From 26a9fbc416cc8afaf2099ce293334e85c76b50cb Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 24 Jun 2013 10:50:25 +0200 Subject: [PATCH 01/41] MDEV-4506: Parallel replication of group-committed transactions: Intermediate commit First very rough sketch. We spawn and retire a pool of slave threads. Test main.alias works, most likely not much else does. --- sql/CMakeLists.txt | 2 +- sql/log.cc | 16 +- sql/log.h | 5 +- sql/log_event.cc | 55 ++- sql/log_event.h | 13 +- sql/mysqld.cc | 19 +- sql/mysqld.h | 8 +- sql/rpl_parallel.cc | 509 ++++++++++++++++++++++++++++ sql/rpl_parallel.h | 74 ++++ sql/rpl_rli.h | 2 + sql/share/errmsg-utf8.txt | 2 + sql/slave.cc | 14 +- sql/slave.h | 4 +- sql/sys_vars.cc | 46 +++ storage/federatedx/ha_federatedx.cc | 1 - storage/sphinx/snippets_udf.cc | 16 +- 16 files changed, 747 insertions(+), 39 deletions(-) create mode 100644 sql/rpl_parallel.cc create mode 100644 sql/rpl_parallel.h diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt index 36ab121cadf..2d7499c8b9e 100644 --- a/sql/CMakeLists.txt +++ b/sql/CMakeLists.txt @@ -88,7 +88,7 @@ SET (SQL_SOURCE threadpool_common.cc ../sql-common/mysql_async.c my_apc.cc my_apc.h - rpl_gtid.cc + rpl_gtid.cc rpl_parallel.cc ${GEN_SOURCES} ${MYSYS_LIBWRAP_SOURCE} ) diff --git a/sql/log.cc b/sql/log.cc index 19fc3cc7b6f..d312f4bc936 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -5355,7 +5355,7 @@ MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd, /* Generate a new global transaction ID, and write it to the binlog */ bool MYSQL_BIN_LOG::write_gtid_event(THD *thd, bool standalone, - bool is_transactional) + bool is_transactional, uint64 commit_id) { rpl_gtid gtid; uint32 domain_id= thd->variables.gtid_domain_id; @@ -5393,7 +5393,8 @@ MYSQL_BIN_LOG::write_gtid_event(THD *thd, bool standalone, return true; Gtid_log_event gtid_event(thd, seq_no, domain_id, standalone, - LOG_EVENT_SUPPRESS_USE_F, is_transactional); + LOG_EVENT_SUPPRESS_USE_F, is_transactional, + commit_id); /* Write the event to the binary log. */ if (gtid_event.write(&mysql_bin_log.log_file)) @@ -5651,7 +5652,7 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info, my_bool *with_annotate) my_org_b_tell= my_b_tell(file); mysql_mutex_lock(&LOCK_log); prev_binlog_id= current_binlog_id; - if (write_gtid_event(thd, true, using_trans)) + if (write_gtid_event(thd, true, using_trans, 0)) goto err; } else @@ -6667,6 +6668,7 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader) group_commit_entry *queue= NULL; bool check_purge= false; ulong binlog_id; + uint64 commit_id; DBUG_ENTER("MYSQL_BIN_LOG::trx_group_commit_leader"); LINT_INIT(binlog_id); @@ -6701,6 +6703,7 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader) DBUG_ASSERT(is_open()); if (likely(is_open())) // Should always be true { + commit_id= (last_in_queue == leader ? 0 : (uint64)leader->thd->query_id); /* Commit every transaction in the queue. @@ -6721,7 +6724,7 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader) */ DBUG_ASSERT(!cache_mngr->stmt_cache.empty() || !cache_mngr->trx_cache.empty()); - if ((current->error= write_transaction_or_stmt(current))) + if ((current->error= write_transaction_or_stmt(current, commit_id))) current->commit_errno= errno; strmake_buf(cache_mngr->last_commit_pos_file, log_file_name); @@ -6896,11 +6899,12 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader) int -MYSQL_BIN_LOG::write_transaction_or_stmt(group_commit_entry *entry) +MYSQL_BIN_LOG::write_transaction_or_stmt(group_commit_entry *entry, + uint64 commit_id) { binlog_cache_mngr *mngr= entry->cache_mngr; - if (write_gtid_event(entry->thd, false, entry->using_trx_cache)) + if (write_gtid_event(entry->thd, false, entry->using_trx_cache, commit_id)) return ER_ERROR_ON_WRITE; if (entry->using_stmt_cache && !mngr->stmt_cache.empty() && diff --git a/sql/log.h b/sql/log.h index 018ac64eff7..0b1344aa523 100644 --- a/sql/log.h +++ b/sql/log.h @@ -525,7 +525,7 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG int new_file_impl(bool need_lock); void do_checkpoint_request(ulong binlog_id); void purge(); - int write_transaction_or_stmt(group_commit_entry *entry); + int write_transaction_or_stmt(group_commit_entry *entry, uint64 commit_id); bool write_transaction_to_binlog_events(group_commit_entry *entry); void trx_group_commit_leader(group_commit_entry *leader); bool is_xidlist_idle_nolock(); @@ -775,7 +775,8 @@ public: inline uint32 get_open_count() { return open_count; } void set_status_variables(THD *thd); bool is_xidlist_idle(); - bool write_gtid_event(THD *thd, bool standalone, bool is_transactional); + bool write_gtid_event(THD *thd, bool standalone, bool is_transactional, + uint64 commit_id); int read_state_from_file(); int write_state_to_file(); int get_most_recent_gtid_list(rpl_gtid **list, uint32 *size); diff --git a/sql/log_event.cc b/sql/log_event.cc index 3076cbb1766..431f8b47f2d 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -6101,6 +6101,18 @@ Gtid_log_event::Gtid_log_event(const char *buf, uint event_len, domain_id= uint4korr(buf); buf+= 4; flags2= *buf; + if (flags2 & FL_GROUP_COMMIT_ID) + { + if (event_len < (uint)header_size + GTID_HEADER_LEN + 2) + { + seq_no= 0; // So is_valid() returns false + return; + } + ++buf; + commit_id= uint8korr(buf); + } + else + commit_id= 0; } @@ -6108,10 +6120,11 @@ Gtid_log_event::Gtid_log_event(const char *buf, uint event_len, Gtid_log_event::Gtid_log_event(THD *thd_arg, uint64 seq_no_arg, uint32 domain_id_arg, bool standalone, - uint16 flags_arg, bool is_transactional) + uint16 flags_arg, bool is_transactional, + uint64 commit_id_arg) : Log_event(thd_arg, flags_arg, is_transactional), - seq_no(seq_no_arg), domain_id(domain_id_arg), - flags2(standalone ? FL_STANDALONE : 0) + seq_no(seq_no_arg), commit_id(commit_id_arg), domain_id(domain_id_arg), + flags2((standalone ? FL_STANDALONE : 0) | (commit_id_arg ? FL_GROUP_COMMIT_ID : 0)) { cache_type= Log_event::EVENT_NO_CACHE; } @@ -6156,13 +6169,24 @@ Gtid_log_event::peek(const char *event_start, size_t event_len, bool Gtid_log_event::write(IO_CACHE *file) { - uchar buf[GTID_HEADER_LEN]; + uchar buf[GTID_HEADER_LEN+2]; + size_t write_len; + int8store(buf, seq_no); int4store(buf+8, domain_id); buf[12]= flags2; - bzero(buf+13, GTID_HEADER_LEN-13); - return write_header(file, GTID_HEADER_LEN) || - wrapper_my_b_safe_write(file, buf, GTID_HEADER_LEN) || + if (flags2 & FL_GROUP_COMMIT_ID) + { + int8store(buf+13, commit_id); + write_len= GTID_HEADER_LEN + 2; + } + else + { + bzero(buf+13, GTID_HEADER_LEN-13); + write_len= GTID_HEADER_LEN; + } + return write_header(file, write_len) || + wrapper_my_b_safe_write(file, buf, write_len) || write_footer(file); } @@ -6201,7 +6225,7 @@ Gtid_log_event::make_compatible_event(String *packet, bool *need_dummy_event, void Gtid_log_event::pack_info(THD *thd, Protocol *protocol) { - char buf[6+5+10+1+10+1+20+1]; + char buf[6+5+10+1+10+1+20+1+4+20+1]; char *p; p = strmov(buf, (flags2 & FL_STANDALONE ? "GTID " : "BEGIN GTID ")); p= longlong10_to_str(domain_id, p, 10); @@ -6209,6 +6233,11 @@ Gtid_log_event::pack_info(THD *thd, Protocol *protocol) p= longlong10_to_str(server_id, p, 10); *p++= '-'; p= longlong10_to_str(seq_no, p, 10); + if (flags2 & FL_GROUP_COMMIT_ID) + { + p= strmov(p, " cid="); + p= longlong10_to_str(commit_id, p, 10); + } protocol->store(buf, p-buf, &my_charset_bin); } @@ -6295,12 +6324,20 @@ Gtid_log_event::print(FILE *file, PRINT_EVENT_INFO *print_event_info) Write_on_release_cache cache(&print_event_info->head_cache, file, Write_on_release_cache::FLUSH_F); char buf[21]; + char buf2[21]; if (!print_event_info->short_form) { print_header(&cache, print_event_info, FALSE); longlong10_to_str(seq_no, buf, 10); - my_b_printf(&cache, "\tGTID %u-%u-%s\n", domain_id, server_id, buf); + if (flags2 & FL_GROUP_COMMIT_ID) + { + longlong10_to_str(commit_id, buf2, 10); + my_b_printf(&cache, "\tGTID %u-%u-%s cid=%s\n", + domain_id, server_id, buf, buf2); + } + else + my_b_printf(&cache, "\tGTID %u-%u-%s\n", domain_id, server_id, buf); if (!print_event_info->domain_id_printed || print_event_info->domain_id != domain_id) diff --git a/sql/log_event.h b/sql/log_event.h index b54e2028ef2..641ab3e37b7 100644 --- a/sql/log_event.h +++ b/sql/log_event.h @@ -3080,6 +3080,7 @@ class Gtid_log_event: public Log_event { public: uint64 seq_no; + uint64 commit_id; uint32 domain_id; uchar flags2; @@ -3087,10 +3088,15 @@ public: /* FL_STANDALONE is set when there is no terminating COMMIT event. */ static const uchar FL_STANDALONE= 1; + /* + FL_GROUP_COMMIT_ID is set when event group is part of a group commit on the + master. Groups with same commit_id are part of the same group commit. + */ + static const uchar FL_GROUP_COMMIT_ID= 2; #ifdef MYSQL_SERVER Gtid_log_event(THD *thd_arg, uint64 seq_no, uint32 domain_id, bool standalone, - uint16 flags, bool is_transactional); + uint16 flags, bool is_transactional, uint64 commit_id); #ifdef HAVE_REPLICATION void pack_info(THD *thd, Protocol *protocol); virtual int do_apply_event(Relay_log_info const *rli); @@ -3104,7 +3110,10 @@ public: const Format_description_log_event *description_event); ~Gtid_log_event() { } Log_event_type get_type_code() { return GTID_EVENT; } - int get_data_size() { return GTID_HEADER_LEN; } + int get_data_size() + { + return GTID_HEADER_LEN + ((flags2 & FL_GROUP_COMMIT_ID) ? 2 : 0); + } bool is_valid() const { return seq_no != 0; } #ifdef MYSQL_SERVER bool write(IO_CACHE *file); diff --git a/sql/mysqld.cc b/sql/mysqld.cc index d5af1634a8a..4e2679f1c91 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -543,6 +543,8 @@ ulong rpl_recovery_rank=0; */ ulong stored_program_cache_size= 0; +ulong opt_slave_parallel_threads= 0; + const double log_10[] = { 1e000, 1e001, 1e002, 1e003, 1e004, 1e005, 1e006, 1e007, 1e008, 1e009, 1e010, 1e011, 1e012, 1e013, 1e014, 1e015, 1e016, 1e017, 1e018, 1e019, @@ -769,7 +771,8 @@ PSI_mutex_key key_BINLOG_LOCK_index, key_BINLOG_LOCK_xid_list, key_LOCK_thread_count, key_LOCK_thread_cache, key_PARTITION_LOCK_auto_inc; PSI_mutex_key key_RELAYLOG_LOCK_index; -PSI_mutex_key key_LOCK_slave_state, key_LOCK_binlog_state; +PSI_mutex_key key_LOCK_slave_state, key_LOCK_binlog_state, + key_LOCK_rpl_thread, key_LOCK_rpl_thread_pool; PSI_mutex_key key_LOCK_stats, key_LOCK_global_user_client_stats, key_LOCK_global_table_stats, @@ -844,7 +847,9 @@ static PSI_mutex_info all_server_mutexes[]= { &key_LOCK_thread_cache, "LOCK_thread_cache", PSI_FLAG_GLOBAL}, { &key_PARTITION_LOCK_auto_inc, "HA_DATA_PARTITION::LOCK_auto_inc", 0}, { &key_LOCK_slave_state, "LOCK_slave_state", 0}, - { &key_LOCK_binlog_state, "LOCK_binlog_state", 0} + { &key_LOCK_binlog_state, "LOCK_binlog_state", 0}, + { &key_LOCK_rpl_thread, "LOCK_rpl_thread", 0}, + { &key_LOCK_rpl_thread_pool, "LOCK_rpl_thread_pool", 0} }; PSI_rwlock_key key_rwlock_LOCK_grant, key_rwlock_LOCK_logger, @@ -886,6 +891,7 @@ PSI_cond_key key_BINLOG_COND_xid_list, key_BINLOG_update_cond, PSI_cond_key key_RELAYLOG_update_cond, key_COND_wakeup_ready; PSI_cond_key key_RELAYLOG_COND_queue_busy; PSI_cond_key key_TC_LOG_MMAP_COND_queue_busy; +PSI_cond_key key_COND_rpl_thread, key_COND_rpl_thread_pool; static PSI_cond_info all_server_conds[]= { @@ -926,13 +932,15 @@ static PSI_cond_info all_server_conds[]= { &key_user_level_lock_cond, "User_level_lock::cond", 0}, { &key_COND_thread_count, "COND_thread_count", PSI_FLAG_GLOBAL}, { &key_COND_thread_cache, "COND_thread_cache", PSI_FLAG_GLOBAL}, - { &key_COND_flush_thread_cache, "COND_flush_thread_cache", PSI_FLAG_GLOBAL} + { &key_COND_flush_thread_cache, "COND_flush_thread_cache", PSI_FLAG_GLOBAL}, + { &key_COND_rpl_thread, "COND_rpl_thread", 0}, + { &key_COND_rpl_thread_pool, "COND_rpl_thread_pool", 0} }; PSI_thread_key key_thread_bootstrap, key_thread_delayed_insert, key_thread_handle_manager, key_thread_main, key_thread_one_connection, key_thread_signal_hand, - key_thread_slave_init; + key_thread_slave_init, key_rpl_parallel_thread; static PSI_thread_info all_server_threads[]= { @@ -958,7 +966,8 @@ static PSI_thread_info all_server_threads[]= { &key_thread_main, "main", PSI_FLAG_GLOBAL}, { &key_thread_one_connection, "one_connection", 0}, { &key_thread_signal_hand, "signal_handler", PSI_FLAG_GLOBAL}, - { &key_thread_slave_init, "slave_init", PSI_FLAG_GLOBAL} + { &key_thread_slave_init, "slave_init", PSI_FLAG_GLOBAL}, + { &key_rpl_parallel_thread, "rpl_parallel_thread", 0} }; PSI_file_key key_file_binlog, key_file_binlog_index, key_file_casetest, diff --git a/sql/mysqld.h b/sql/mysqld.h index 02d6b41cf69..ff2dfffa991 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -176,6 +176,7 @@ extern ulong slave_max_allowed_packet; extern ulong opt_binlog_rows_event_max_size; extern ulong rpl_recovery_rank, thread_cache_size; extern ulong stored_program_cache_size; +extern ulong opt_slave_parallel_threads; extern ulong back_log; extern ulong executed_events; extern char language[FN_REFLEN]; @@ -247,7 +248,8 @@ extern PSI_mutex_key key_BINLOG_LOCK_index, key_BINLOG_LOCK_xid_list, key_structure_guard_mutex, key_TABLE_SHARE_LOCK_ha_data, key_LOCK_error_messages, key_LOCK_thread_count, key_PARTITION_LOCK_auto_inc; extern PSI_mutex_key key_RELAYLOG_LOCK_index; -extern PSI_mutex_key key_LOCK_slave_state, key_LOCK_binlog_state; +extern PSI_mutex_key key_LOCK_slave_state, key_LOCK_binlog_state, + key_LOCK_rpl_thread, key_LOCK_rpl_thread_pool; extern PSI_mutex_key key_LOCK_stats, key_LOCK_global_user_client_stats, key_LOCK_global_table_stats, @@ -280,10 +282,12 @@ extern PSI_cond_key key_BINLOG_COND_xid_list, key_BINLOG_update_cond, extern PSI_cond_key key_RELAYLOG_update_cond, key_COND_wakeup_ready; extern PSI_cond_key key_RELAYLOG_COND_queue_busy; extern PSI_cond_key key_TC_LOG_MMAP_COND_queue_busy; +extern PSI_cond_key key_COND_rpl_thread, key_COND_rpl_thread_pool; extern PSI_thread_key key_thread_bootstrap, key_thread_delayed_insert, key_thread_handle_manager, key_thread_kill_server, key_thread_main, - key_thread_one_connection, key_thread_signal_hand, key_thread_slave_init; + key_thread_one_connection, key_thread_signal_hand, key_thread_slave_init, + key_rpl_parallel_thread; extern PSI_file_key key_file_binlog, key_file_binlog_index, key_file_casetest, key_file_dbopt, key_file_des_key_file, key_file_ERRMSG, key_select_to_file, diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc new file mode 100644 index 00000000000..a4a87c1e92e --- /dev/null +++ b/sql/rpl_parallel.cc @@ -0,0 +1,509 @@ +#include "my_global.h" +#include "rpl_parallel.h" +#include "slave.h" +#include "rpl_mi.h" + + +struct rpl_parallel_thread_pool global_rpl_thread_pool; + + +static void +rpt_handle_event(rpl_parallel_thread::queued_event *qev, + THD *thd, + struct rpl_parallel_thread *rpt) +{ + int err; + + /* ToDo: Access to thd, and what about rli, split out a parallel part? */ + err= apply_event_and_update_pos(qev->ev, thd, qev->rli, rpt); + /* ToDo: error handling. */ + /* ToDo: also free qev->ev, or hold on to it for a bit if necessary. */ +} + + +pthread_handler_t +handle_rpl_parallel_thread(void *arg) +{ + THD *thd; + const char* old_msg; + struct rpl_parallel_thread::queued_event *events; + bool group_standalone= true; + bool in_event_group= false; + + struct rpl_parallel_thread *rpt= (struct rpl_parallel_thread *)arg; + + my_thread_init(); + thd = new THD; + thd->thread_stack = (char*)&thd; + mysql_mutex_lock(&LOCK_thread_count); + thd->thread_id= thd->variables.pseudo_thread_id= thread_id++; + threads.append(thd); + mysql_mutex_unlock(&LOCK_thread_count); + set_current_thd(thd); + pthread_detach_this_thread(); + thd->init_for_queries(); + thd->variables.binlog_annotate_row_events= 0; + init_thr_lock(); + thd->store_globals(); + thd->system_thread= SYSTEM_THREAD_SLAVE_SQL; + thd->security_ctx->skip_grants(); + thd->variables.max_allowed_packet= slave_max_allowed_packet; + thd->slave_thread= 1; + thd->enable_slow_log= opt_log_slow_slave_statements; + thd->variables.log_slow_filter= global_system_variables.log_slow_filter; + set_slave_thread_options(thd); + thd->client_capabilities = CLIENT_LOCAL_FILES; + thd_proc_info(thd, "Waiting for work from main SQL threads"); + thd->set_time(); + thd->variables.lock_wait_timeout= LONG_TIMEOUT; + + mysql_mutex_lock(&rpt->LOCK_rpl_thread); + rpt->thd= thd; + + while (rpt->delay_start) + mysql_cond_wait(&rpt->COND_rpl_thread, &rpt->LOCK_rpl_thread); + + rpt->running= true; + + while (!rpt->stop && !thd->killed) + { + rpl_parallel_thread *list; + + old_msg= thd->proc_info; + thd->enter_cond(&rpt->COND_rpl_thread, &rpt->LOCK_rpl_thread, + "Waiting for work from SQL thread"); + while (!rpt->stop && !thd->killed && !(events= rpt->event_queue)) + mysql_cond_wait(&rpt->COND_rpl_thread, &rpt->LOCK_rpl_thread); + rpt->free= false; + rpt->event_queue= rpt->last_in_queue= NULL; + thd->exit_cond(old_msg); + + more_events: + while (events) + { + struct rpl_parallel_thread::queued_event *next= events->next; + Log_event_type event_type= events->ev->get_type_code(); + if (event_type == GTID_EVENT) + { + group_standalone= + (0 != (static_cast(events->ev)->flags2 & + Gtid_log_event::FL_STANDALONE)); + in_event_group= true; + } + else + { + if (group_standalone) + { + if (!Log_event::is_part_of_group(event_type)) + in_event_group= false; + } + else if (event_type == XID_EVENT) + in_event_group= false; + else if (event_type == QUERY_EVENT) + { + Query_log_event *query= static_cast(events->ev); + if (!strcmp("COMMIT", query->query) || + !strcmp("ROLLBACK", query->query)) + in_event_group= false; + } + } + rpt_handle_event(events, thd, rpt); + free(events); + events= next; + } + + mysql_mutex_lock(&rpt->LOCK_rpl_thread); + if ((events= rpt->event_queue) != NULL) + { + rpt->event_queue= rpt->last_in_queue= NULL; + mysql_mutex_unlock(&rpt->LOCK_rpl_thread); + goto more_events; + } + + if (!in_event_group) + { + rpt->current_entry= NULL; + if (!rpt->free) + { + mysql_mutex_lock(&rpt->pool->LOCK_rpl_thread_pool); + list= rpt->pool->free_list; + rpt->next= list; + rpt->pool->free_list= list; + if (!list) + mysql_cond_signal(&rpt->pool->COND_rpl_thread_pool); + mysql_mutex_unlock(&rpt->pool->LOCK_rpl_thread_pool); + rpt->free= true; + } + } + } + + rpt->running= false; + mysql_mutex_unlock(&rpt->LOCK_rpl_thread); + + return NULL; +} + + +int +rpl_parallel_change_thread_count(rpl_parallel_thread_pool *pool, + uint32 new_count, bool skip_check) +{ + uint32 i; + rpl_parallel_thread **new_list= NULL; + rpl_parallel_thread *new_free_list= NULL; + + /* + Allocate the new list of threads up-front. + That way, if we fail half-way, we only need to free whatever we managed + to allocate, and will not be left with a half-functional thread pool. + */ + if (new_count && + !(new_list= (rpl_parallel_thread **)my_malloc(new_count*sizeof(*new_list), + MYF(MY_WME)))) + { + my_error(ER_OUTOFMEMORY, MYF(0), (int(new_count*sizeof(*new_list)))); + goto err;; + } + + for (i= 0; i < new_count; ++i) + { + pthread_t th; + + if (!(new_list[i]= (rpl_parallel_thread *)my_malloc(sizeof(*(new_list[i])), + MYF(MY_WME)))) + { + my_error(ER_OUTOFMEMORY, MYF(0), sizeof(*(new_list[i]))); + goto err; + } + new_list[i]->delay_start= true; + new_list[i]->running= false; + new_list[i]->stop= false; + new_list[i]->free= false; + mysql_mutex_init(key_LOCK_rpl_thread, &new_list[i]->LOCK_rpl_thread, + MY_MUTEX_INIT_SLOW); + mysql_cond_init(key_COND_rpl_thread, &new_list[i]->COND_rpl_thread, NULL); + new_list[i]->pool= pool; + new_list[i]->current_entry= NULL; + new_list[i]->event_queue= NULL; + new_list[i]->last_in_queue= NULL; + if (mysql_thread_create(key_rpl_parallel_thread, &th, NULL, + handle_rpl_parallel_thread, new_list[i])) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + my_free(new_list[i]); + goto err; + } + new_list[i]->next= new_free_list; + new_free_list= new_list[i]; + } + + if (!skip_check) + { + mysql_mutex_lock(&LOCK_active_mi); + if (master_info_index->give_error_if_slave_running()) + { + mysql_mutex_unlock(&LOCK_active_mi); + goto err; + } + if (pool->changing) + { + mysql_mutex_unlock(&LOCK_active_mi); + my_error(ER_CHANGE_SLAVE_PARALLEL_THREADS_ACTIVE, MYF(0)); + goto err; + } + pool->changing= true; + mysql_mutex_unlock(&LOCK_active_mi); + } + + for (i= 0; i < pool->count; ++i) + { + rpl_parallel_thread *rpt= pool->get_thread(NULL); + rpt->stop= true; + mysql_mutex_unlock(&rpt->LOCK_rpl_thread); + } + + for (i= 0; i < pool->count; ++i) + { + rpl_parallel_thread *rpt= pool->threads[i]; + mysql_mutex_lock(&rpt->LOCK_rpl_thread); + while (rpt->running) + mysql_cond_wait(&rpt->COND_rpl_thread, &rpt->LOCK_rpl_thread); + mysql_mutex_unlock(&rpt->LOCK_rpl_thread); + delete rpt; + } + + my_free(pool->threads); + pool->threads= new_list; + pool->free_list= new_free_list; + pool->count= new_count; + for (i= 0; i < pool->count; ++i) + { + mysql_mutex_lock(&pool->threads[i]->LOCK_rpl_thread); + pool->threads[i]->delay_start= false; + mysql_cond_signal(&pool->threads[i]->COND_rpl_thread); + mysql_mutex_unlock(&pool->threads[i]->LOCK_rpl_thread); + } + + if (!skip_check) + { + mysql_mutex_lock(&LOCK_active_mi); + pool->changing= false; + mysql_mutex_unlock(&LOCK_active_mi); + } + return 0; + +err: + if (new_list) + { + while (new_free_list) + { + rpl_parallel_thread *next= new_free_list->next; + mysql_mutex_lock(&new_free_list->LOCK_rpl_thread); + new_free_list->delay_start= false; + new_free_list->stop= true; + while (!new_free_list->running) + mysql_cond_wait(&new_free_list->COND_rpl_thread, + &new_free_list->LOCK_rpl_thread); + while (new_free_list->running) + mysql_cond_wait(&new_free_list->COND_rpl_thread, + &new_free_list->LOCK_rpl_thread); + my_free(new_free_list); + new_free_list= next; + } + my_free(new_list); + } + if (!skip_check) + { + mysql_mutex_lock(&LOCK_active_mi); + pool->changing= false; + mysql_mutex_unlock(&LOCK_active_mi); + } + return 1; +} + + +rpl_parallel_thread_pool::rpl_parallel_thread_pool() + : count(0), threads(0), free_list(0), changing(false), inited(false) +{ +} + + +int +rpl_parallel_thread_pool::init(uint32 size) +{ + count= 0; + threads= NULL; + free_list= NULL; + + mysql_mutex_init(key_LOCK_rpl_thread_pool, &LOCK_rpl_thread_pool, + MY_MUTEX_INIT_SLOW); + mysql_cond_init(key_COND_rpl_thread_pool, &COND_rpl_thread_pool, NULL); + changing= false; + inited= true; + + return rpl_parallel_change_thread_count(this, size, true); +} + + +void +rpl_parallel_thread_pool::destroy() +{ + if (!inited) + return; + rpl_parallel_change_thread_count(this, 0, true); + mysql_mutex_destroy(&LOCK_rpl_thread_pool); + mysql_cond_destroy(&COND_rpl_thread_pool); +} + + +struct rpl_parallel_thread * +rpl_parallel_thread_pool::get_thread(rpl_parallel_entry *entry) +{ + rpl_parallel_thread *rpt; + + mysql_mutex_lock(&LOCK_rpl_thread_pool); + while ((rpt= free_list) == NULL) + mysql_cond_wait(&COND_rpl_thread_pool, &LOCK_rpl_thread_pool); + free_list= rpt->next; + mysql_mutex_lock(&rpt->LOCK_rpl_thread); + mysql_mutex_unlock(&LOCK_rpl_thread_pool); + rpt->current_entry= entry; + + return rpt; +} + + +rpl_parallel::rpl_parallel() : + current(NULL) +{ + my_hash_init(&domain_hash, &my_charset_bin, 32, + offsetof(rpl_parallel_entry, domain_id), sizeof(uint32), + NULL, NULL, HASH_UNIQUE); +} + + +rpl_parallel::~rpl_parallel() +{ + my_hash_free(&domain_hash); +} + + +rpl_parallel_entry * +rpl_parallel::find(uint32 domain_id) +{ + struct rpl_parallel_entry *e; + + if (!(e= (rpl_parallel_entry *)my_hash_search(&domain_hash, + (const uchar *)&domain_id, 0))) + { + /* Allocate a new, empty one. */ + if (!(e= (struct rpl_parallel_entry *)my_malloc(sizeof(*e), MYF(0)))) + return NULL; + e->domain_id= domain_id; + e->last_server_id= 0; + e->last_seq_no= 0; + e->last_commit_id= 0; + e->active= false; + e->rpl_thread= NULL; + if (my_hash_insert(&domain_hash, (uchar *)e)) + { + my_free(e); + return NULL; + } + } + + return e; +} + + +bool +rpl_parallel::do_event(Relay_log_info *rli, Log_event *ev, THD *parent_thd) +{ + rpl_parallel_entry *e; + rpl_parallel_thread *cur_thread; + rpl_parallel_thread::queued_event *qev; + + if (!(qev= (rpl_parallel_thread::queued_event *)my_malloc(sizeof(*qev), + MYF(0)))) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + return true; + } + qev->ev= ev; + qev->rli= rli; + qev->next= NULL; + + if (ev->get_type_code() == GTID_EVENT) + { + Gtid_log_event *gtid_ev= static_cast(ev); + + if (!(e= find(gtid_ev->domain_id))) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + return true; + } + + /* Check if we already have a worker thread for this entry. */ + cur_thread= e->rpl_thread; + if (cur_thread) + { + mysql_mutex_lock(&cur_thread->LOCK_rpl_thread); + if (cur_thread->current_entry != e) + { + /* Not ours anymore, we need to grab a new one. */ + mysql_mutex_unlock(&cur_thread->LOCK_rpl_thread); + e->rpl_thread= cur_thread= NULL; + } + } + + if (!cur_thread) + { + /* + Nothing else is currently running in this domain. We can spawn a new + thread to do this event group in parallel with anything else that might + be running in other domains. + */ + if (gtid_ev->flags & Gtid_log_event::FL_GROUP_COMMIT_ID) + { + e->last_server_id= gtid_ev->server_id; + e->last_seq_no= gtid_ev->seq_no; + e->last_commit_id= gtid_ev->commit_id; + } + else + { + e->last_server_id= 0; + e->last_seq_no= 0; + e->last_commit_id= 0; + } + cur_thread= e->rpl_thread= global_rpl_thread_pool.get_thread(e); + e->rpl_thread->wait_for= NULL; /* ToDo */ + /* get_thread() returns with the LOCK_rpl_thread locked. */ + } + else if ((gtid_ev->flags & Gtid_log_event::FL_GROUP_COMMIT_ID) && + e->last_commit_id == gtid_ev->commit_id) + { + /* + We are already executing something else in this domain. But the two + event groups were committed together in the same group commit on the + master, so we can still do them in parallel here on the slave. + + However, the commit of this event must wait for the commit of the prior + event, to preserve binlog commit order and visibility across all + servers in the replication hierarchy. + */ + rpl_parallel_thread *rpt= global_rpl_thread_pool.get_thread(e); + rpt->wait_for= cur_thread; /* ToDo */ + mysql_mutex_unlock(&cur_thread->LOCK_rpl_thread); + e->rpl_thread= cur_thread= rpt; + /* get_thread() returns with the LOCK_rpl_thread locked. */ + } + else + { + /* + We are still executing the previous event group for this replication + domain, and we have to wait for that to finish before we can start on + the next one. So just re-use the thread. + */ + } + + current= e; + } + else + { + if (!current) + { + /* We have no domain_id yet, just run non-parallel. */ + rpt_handle_event(qev, parent_thd, NULL); + return false; + } + cur_thread= current->rpl_thread; + if (cur_thread) + { + mysql_mutex_lock(&cur_thread->LOCK_rpl_thread); + if (cur_thread->current_entry != current) + { + /* Not ours anymore, we need to grab a new one. */ + mysql_mutex_unlock(&cur_thread->LOCK_rpl_thread); + cur_thread= NULL; + } + } + if (!cur_thread) + { + cur_thread= current->rpl_thread= + global_rpl_thread_pool.get_thread(current); + cur_thread->wait_for= NULL; /* ToDo */ + } + } + /* + Queue the event for processing. + */ + if (cur_thread->last_in_queue) + cur_thread->last_in_queue->next= qev; + else + cur_thread->event_queue= qev; + cur_thread->last_in_queue= qev; + mysql_cond_signal(&cur_thread->COND_rpl_thread); + mysql_mutex_unlock(&cur_thread->LOCK_rpl_thread); + + return false; +} diff --git a/sql/rpl_parallel.h b/sql/rpl_parallel.h new file mode 100644 index 00000000000..7e966f1615c --- /dev/null +++ b/sql/rpl_parallel.h @@ -0,0 +1,74 @@ +#ifndef RPL_PARALLEL_H +#define RPL_PARALLEL_H + +#include "log_event.h" + + +struct rpl_parallel; +struct rpl_parallel_entry; +struct rpl_parallel_thread_pool; + +class Relay_log_info; +struct rpl_parallel_thread { + bool delay_start; + bool running; + bool stop; + bool free; + mysql_mutex_t LOCK_rpl_thread; + mysql_cond_t COND_rpl_thread; + struct rpl_parallel_thread *next; /* For free list. */ + struct rpl_parallel_thread_pool *pool; + THD *thd; + struct rpl_parallel_entry *current_entry; + struct queued_event { + queued_event *next; + Log_event *ev; + Relay_log_info *rli; + } *event_queue, *last_in_queue; + rpl_parallel_thread *wait_for; /* ToDo: change this ... */ +}; + + +struct rpl_parallel_thread_pool { + uint32 count; + struct rpl_parallel_thread **threads; + struct rpl_parallel_thread *free_list; + mysql_mutex_t LOCK_rpl_thread_pool; + mysql_cond_t COND_rpl_thread_pool; + bool changing; + bool inited; + + rpl_parallel_thread_pool(); + int init(uint32 size); + void destroy(); + struct rpl_parallel_thread *get_thread(rpl_parallel_entry *entry); +}; + + +struct rpl_parallel_entry { + uint32 domain_id; + uint32 last_server_id; + uint64 last_seq_no; + uint64 last_commit_id; + bool active; + rpl_parallel_thread *rpl_thread; +}; +struct rpl_parallel { + HASH domain_hash; + rpl_parallel_entry *current; + + rpl_parallel(); + ~rpl_parallel(); + rpl_parallel_entry *find(uint32 domain_id); + bool do_event(Relay_log_info *rli, Log_event *ev, THD *thd); +}; + + +extern struct rpl_parallel_thread_pool global_rpl_thread_pool; + + +extern int rpl_parallel_change_thread_count(rpl_parallel_thread_pool *pool, + uint32 new_count, + bool skip_check= false); + +#endif /* RPL_PARALLEL_H */ diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h index 6dd757343fd..452457e9e5a 100644 --- a/sql/rpl_rli.h +++ b/sql/rpl_rli.h @@ -22,6 +22,7 @@ #include "log.h" /* LOG_INFO, MYSQL_BIN_LOG */ #include "sql_class.h" /* THD */ #include "log_event.h" +#include "rpl_parallel.h" struct RPL_TABLE_LIST; class Master_info; @@ -318,6 +319,7 @@ public: */ uint64 gtid_sub_id; rpl_gtid current_gtid; + rpl_parallel parallel; Relay_log_info(bool is_slave_recovery); ~Relay_log_info(); diff --git a/sql/share/errmsg-utf8.txt b/sql/share/errmsg-utf8.txt index 48809417a6c..85baddd3c49 100644 --- a/sql/share/errmsg-utf8.txt +++ b/sql/share/errmsg-utf8.txt @@ -6555,3 +6555,5 @@ ER_INSIDE_TRANSACTION_PREVENTS_SWITCH_GTID_DOMAIN_ID_SEQ_NO eng "Cannot modify @@session.gtid_domain_id or @@session.gtid_seq_no inside a transaction" ER_STORED_FUNCTION_PREVENTS_SWITCH_GTID_DOMAIN_ID_SEQ_NO eng "Cannot modify @@session.gtid_domain_id or @@session.gtid_seq_no inside a stored function or trigger" +ER_CHANGE_SLAVE_PARALLEL_THREADS_ACTIVE + eng "Cannot change @@slave_parallel_threads while another change is in progress" diff --git a/sql/slave.cc b/sql/slave.cc index 1734b2c4f76..419fa579a09 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -57,6 +57,8 @@ #include "rpl_tblmap.h" #include "debug_sync.h" +#include "rpl_parallel.h" + #define FLAGSTR(V,F) ((V)&(F)?#F" ":"") @@ -360,6 +362,9 @@ int init_slave() goto err; } + if (global_rpl_thread_pool.init(opt_slave_parallel_threads)) + return 1; + /* If --slave-skip-errors=... was not used, the string value for the system variable has not been set up yet. Do it now. @@ -947,6 +952,7 @@ void end_slave() master_info_index= 0; active_mi= 0; mysql_mutex_unlock(&LOCK_active_mi); + global_rpl_thread_pool.destroy(); free_all_rpl_filters(); DBUG_VOID_RETURN; } @@ -3012,7 +3018,8 @@ static int has_temporary_error(THD *thd) @retval 2 No error calling ev->apply_event(), but error calling ev->update_pos(). */ -int apply_event_and_update_pos(Log_event* ev, THD* thd, Relay_log_info* rli) +int apply_event_and_update_pos(Log_event* ev, THD* thd, Relay_log_info* rli, + rpl_parallel_thread *rpt) { int exec_res= 0; @@ -3234,7 +3241,10 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli) };); } - exec_res= apply_event_and_update_pos(ev, thd, rli); + if (opt_slave_parallel_threads > 0) + DBUG_RETURN(rli->parallel.do_event(rli, ev, thd)); + + exec_res= apply_event_and_update_pos(ev, thd, rli, NULL); switch (ev->get_type_code()) { case FORMAT_DESCRIPTION_EVENT: diff --git a/sql/slave.h b/sql/slave.h index 565f40b7236..69b0e011a39 100644 --- a/sql/slave.h +++ b/sql/slave.h @@ -51,6 +51,7 @@ class Relay_log_info; class Master_info; class Master_info_index; +struct rpl_parallel_thread; int init_intvar_from_file(int* var, IO_CACHE* f, int default_val); int init_strvar_from_file(char *var, int max_size, IO_CACHE *f, @@ -227,7 +228,8 @@ int purge_relay_logs(Relay_log_info* rli, THD *thd, bool just_reset, void set_slave_thread_options(THD* thd); void set_slave_thread_default_charset(THD *thd, Relay_log_info const *rli); int rotate_relay_log(Master_info* mi); -int apply_event_and_update_pos(Log_event* ev, THD* thd, Relay_log_info* rli); +int apply_event_and_update_pos(Log_event* ev, THD* thd, Relay_log_info* rli, + rpl_parallel_thread *rpt); pthread_handler_t handle_slave_io(void *arg); pthread_handler_t handle_slave_sql(void *arg); diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 4066a04aea7..f63960a4e36 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -57,6 +57,7 @@ #include "threadpool.h" #include "sql_repl.h" #include "opt_range.h" +#include "rpl_parallel.h" /* The rule for this file: everything should be 'static'. When a sys_var @@ -1434,6 +1435,51 @@ static Sys_var_mybool Sys_gtid_strict_mode( "generate an out-of-order binlog if executed.", GLOBAL_VAR(opt_gtid_strict_mode), CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + + +static bool +check_slave_parallel_threads(sys_var *self, THD *thd, set_var *var) +{ + bool running; + + mysql_mutex_unlock(&LOCK_global_system_variables); + mysql_mutex_lock(&LOCK_active_mi); + running= master_info_index->give_error_if_slave_running(); + mysql_mutex_unlock(&LOCK_active_mi); + mysql_mutex_lock(&LOCK_global_system_variables); + if (running) + return true; + + return false; +} + +static bool +fix_slave_parallel_threads(sys_var *self, THD *thd, enum_var_type type) +{ + bool running; + + mysql_mutex_unlock(&LOCK_global_system_variables); + mysql_mutex_lock(&LOCK_active_mi); + running= master_info_index->give_error_if_slave_running(); + mysql_mutex_unlock(&LOCK_active_mi); + mysql_mutex_lock(&LOCK_global_system_variables); + if (running || rpl_parallel_change_thread_count(&global_rpl_thread_pool, + opt_slave_parallel_threads)) + return true; + + return false; +} + + +static Sys_var_ulong Sys_slave_parallel_threads( + "slave_parallel_threads", + "If non-zero, number of threads to spawn to apply in parallel events " + "on the slave that were group-committed on the master or were logged " + "with GTID in different replication domains.", + GLOBAL_VAR(opt_slave_parallel_threads), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0,16383), DEFAULT(0), BLOCK_SIZE(1), NO_MUTEX_GUARD, + NOT_IN_BINLOG, ON_CHECK(check_slave_parallel_threads), + ON_UPDATE(fix_slave_parallel_threads)); #endif diff --git a/storage/federatedx/ha_federatedx.cc b/storage/federatedx/ha_federatedx.cc index a7f2d887952..1c9db78da4f 100644 --- a/storage/federatedx/ha_federatedx.cc +++ b/storage/federatedx/ha_federatedx.cc @@ -539,7 +539,6 @@ static int parse_url_error(FEDERATEDX_SHARE *share, TABLE_SHARE *table_s, int get_connection(MEM_ROOT *mem_root, FEDERATEDX_SHARE *share) { int error_num= ER_FOREIGN_SERVER_DOESNT_EXIST; - char error_buffer[FEDERATEDX_QUERY_BUFFER_SIZE]; FOREIGN_SERVER *server, server_buffer; DBUG_ENTER("ha_federatedx::get_connection"); diff --git a/storage/sphinx/snippets_udf.cc b/storage/sphinx/snippets_udf.cc index 5318592ab5f..c83cba43821 100644 --- a/storage/sphinx/snippets_udf.cc +++ b/storage/sphinx/snippets_udf.cc @@ -244,7 +244,7 @@ struct CSphUrl char * m_sBuffer; char * m_sFormatted; - char * m_sScheme; + const char * m_sScheme; char * m_sHost; char * m_sIndex; @@ -254,8 +254,8 @@ struct CSphUrl : m_sBuffer ( NULL ) , m_sFormatted ( NULL ) , m_sScheme ( SPHINXSE_DEFAULT_SCHEME ) - , m_sHost ( SPHINXSE_DEFAULT_HOST ) - , m_sIndex ( SPHINXSE_DEFAULT_INDEX ) + , m_sHost ( const_cast(SPHINXSE_DEFAULT_HOST) ) + , m_sIndex ( const_cast(SPHINXSE_DEFAULT_INDEX) ) , m_iPort ( SPHINXSE_DEFAULT_PORT ) {} @@ -311,12 +311,12 @@ bool CSphUrl::Parse ( const char * sUrl, int iLen ) // unix-domain socket m_iPort = 0; if (!( m_sIndex = strrchr ( m_sHost, ':' ) )) - m_sIndex = SPHINXSE_DEFAULT_INDEX; + m_sIndex = const_cast(SPHINXSE_DEFAULT_INDEX); else { *m_sIndex++ = '\0'; if ( !*m_sIndex ) - m_sIndex = SPHINXSE_DEFAULT_INDEX; + m_sIndex = const_cast(SPHINXSE_DEFAULT_INDEX); } bOk = true; break; @@ -336,7 +336,7 @@ bool CSphUrl::Parse ( const char * sUrl, int iLen ) if ( m_sIndex ) *m_sIndex++ = '\0'; else - m_sIndex = SPHINXSE_DEFAULT_INDEX; + m_sIndex = const_cast(SPHINXSE_DEFAULT_INDEX); m_iPort = atoi(sPort); if ( !m_iPort ) @@ -348,7 +348,7 @@ bool CSphUrl::Parse ( const char * sUrl, int iLen ) if ( m_sIndex ) *m_sIndex++ = '\0'; else - m_sIndex = SPHINXSE_DEFAULT_INDEX; + m_sIndex = const_cast(SPHINXSE_DEFAULT_INDEX); } bOk = true; @@ -446,7 +446,7 @@ int CSphUrl::Connect() uint uServerVersion; uint uClientVersion = htonl ( SPHINX_SEARCHD_PROTO ); int iSocket = -1; - char * pError = NULL; + const char * pError = NULL; do { iSocket = socket ( iDomain, SOCK_STREAM, 0 ); From 6d1e55f51867fe6cbd73a824b576eee2d349ad43 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 25 Jun 2013 09:30:19 +0200 Subject: [PATCH 02/41] MDEV-4506: Parallel replication: Intermediate commit. A few fixes following tests. Now can apply one INSERT event in a separate worker thread. --- mysql-test/suite/rpl/t/rpl_parallel.test | 44 ++++++++++++++++++++++++ sql/rpl_parallel.cc | 14 ++++++-- sql/slave.cc | 3 +- 3 files changed, 57 insertions(+), 4 deletions(-) create mode 100644 mysql-test/suite/rpl/t/rpl_parallel.test diff --git a/mysql-test/suite/rpl/t/rpl_parallel.test b/mysql-test/suite/rpl/t/rpl_parallel.test new file mode 100644 index 00000000000..5a58b9d0f50 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_parallel.test @@ -0,0 +1,44 @@ +--source include/have_binlog_format_statement.inc + +connect (s1,127.0.0.1,root,,test,$MASTER_MYPORT,); +connect (s2,127.0.0.1,root,,test,$SLAVE_MYPORT,); + +--connection s1 +SELECT @@server_id; +SET sql_log_bin=0; +CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=MyISAM; +SET sql_log_bin=1; + +--connection s2 +SELECT @@server_id; +SET sql_log_bin=0; +CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=MyISAM; +SET sql_log_bin=1; + +--replace_result $MASTER_MYPORT MASTER_PORT +eval CHANGE MASTER TO master_host = '127.0.0.1', master_port = $MASTER_MYPORT, + master_user='root', master_use_gtid=current_pos; + +--connection s1 +INSERT INTO t1 VALUES (1); + +--connection s2 +query_vertical SHOW SLAVE STATUS; + +--source include/start_slave.inc +SELECT * FROM t1; +--sleep 1 +SELECT * FROM t1; + +--source include/stop_slave.inc + +--connection s1 +SET sql_log_bin=0; +DROP TABLE t1; +SET sql_log_bin=1; + +--connection s2 +RESET SLAVE ALL; +SET sql_log_bin=0; +DROP TABLE t1; +SET sql_log_bin=1; diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index a4a87c1e92e..65ae2b87179 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -13,9 +13,13 @@ rpt_handle_event(rpl_parallel_thread::queued_event *qev, struct rpl_parallel_thread *rpt) { int err; + Relay_log_info *rli= qev->rli; + thd->rli_slave= rli; + thd->rpl_filter = rli->mi->rpl_filter; /* ToDo: Access to thd, and what about rli, split out a parallel part? */ - err= apply_event_and_update_pos(qev->ev, thd, qev->rli, rpt); + mysql_mutex_lock(&rli->data_lock); + err= apply_event_and_update_pos(qev->ev, thd, rli, rpt); /* ToDo: error handling. */ /* ToDo: also free qev->ev, or hold on to it for a bit if necessary. */ } @@ -108,7 +112,7 @@ handle_rpl_parallel_thread(void *arg) } } rpt_handle_event(events, thd, rpt); - free(events); + my_free(events); events= next; } @@ -313,6 +317,7 @@ rpl_parallel_thread_pool::destroy() rpl_parallel_change_thread_count(this, 0, true); mysql_mutex_destroy(&LOCK_rpl_thread_pool); mysql_cond_destroy(&COND_rpl_thread_pool); + inited= false; } @@ -325,8 +330,8 @@ rpl_parallel_thread_pool::get_thread(rpl_parallel_entry *entry) while ((rpt= free_list) == NULL) mysql_cond_wait(&COND_rpl_thread_pool, &LOCK_rpl_thread_pool); free_list= rpt->next; - mysql_mutex_lock(&rpt->LOCK_rpl_thread); mysql_mutex_unlock(&LOCK_rpl_thread_pool); + mysql_mutex_lock(&rpt->LOCK_rpl_thread); rpt->current_entry= entry; return rpt; @@ -383,6 +388,9 @@ rpl_parallel::do_event(Relay_log_info *rli, Log_event *ev, THD *parent_thd) rpl_parallel_thread *cur_thread; rpl_parallel_thread::queued_event *qev; + /* ToDo: what to do with this lock?!? */ + mysql_mutex_unlock(&rli->data_lock); + if (!(qev= (rpl_parallel_thread::queued_event *)my_malloc(sizeof(*qev), MYF(0)))) { diff --git a/sql/slave.cc b/sql/slave.cc index 419fa579a09..d7e4d9a25ed 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -5800,7 +5800,8 @@ static Log_event* next_event(Relay_log_info* rli) llstr(my_b_tell(cur_log),llbuf1), llstr(rli->event_relay_log_pos,llbuf2))); DBUG_ASSERT(my_b_tell(cur_log) >= BIN_LOG_HEADER_SIZE); - DBUG_ASSERT(my_b_tell(cur_log) == rli->event_relay_log_pos); + DBUG_ASSERT(opt_slave_parallel_threads > 0 || + my_b_tell(cur_log) == rli->event_relay_log_pos); } #endif /* From 535de71728ff92747b46e985b339d23b4587a9c4 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 25 Jun 2013 15:48:01 +0200 Subject: [PATCH 03/41] MDEV-4506: Parallel replication: intermediate commit. Fix typo in worker thread free list management. Simple parallel INSERT from worker threads runs now. --- mysql-test/suite/rpl/t/rpl_parallel.test | 17 +++++++++++++++++ sql/rpl_parallel.cc | 4 ++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/mysql-test/suite/rpl/t/rpl_parallel.test b/mysql-test/suite/rpl/t/rpl_parallel.test index 5a58b9d0f50..5748218dc10 100644 --- a/mysql-test/suite/rpl/t/rpl_parallel.test +++ b/mysql-test/suite/rpl/t/rpl_parallel.test @@ -20,7 +20,24 @@ eval CHANGE MASTER TO master_host = '127.0.0.1', master_port = $MASTER_MYPORT, master_user='root', master_use_gtid=current_pos; --connection s1 +SET gtid_domain_id=0; INSERT INTO t1 VALUES (1); +SET gtid_domain_id=1; +INSERT INTO t1 VALUES (2); +SET gtid_domain_id=2; +INSERT INTO t1 VALUES (3); +SET gtid_domain_id=0; +INSERT INTO t1 VALUES (4); +SET gtid_domain_id=1; +INSERT INTO t1 VALUES (5); +SET gtid_domain_id=2; +INSERT INTO t1 VALUES (6); +SET gtid_domain_id=0; +INSERT INTO t1 VALUES (7); +SET gtid_domain_id=1; +INSERT INTO t1 VALUES (8); +SET gtid_domain_id=2; +INSERT INTO t1 VALUES (9); --connection s2 query_vertical SHOW SLAVE STATUS; diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index 65ae2b87179..f1ac7e83071 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -132,9 +132,9 @@ handle_rpl_parallel_thread(void *arg) mysql_mutex_lock(&rpt->pool->LOCK_rpl_thread_pool); list= rpt->pool->free_list; rpt->next= list; - rpt->pool->free_list= list; + rpt->pool->free_list= rpt; if (!list) - mysql_cond_signal(&rpt->pool->COND_rpl_thread_pool); + mysql_cond_broadcast(&rpt->pool->COND_rpl_thread_pool); mysql_mutex_unlock(&rpt->pool->LOCK_rpl_thread_pool); rpt->free= true; } From 7e5dc4f074b7d1cee4721e6fa49d6e5628ef793f Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 26 Jun 2013 12:10:35 +0200 Subject: [PATCH 04/41] MDEV-4506: Parallel replication. Intermediate commit. Implement facility for the commit in one thread to wait for the commit of another to complete first. The wait is done in a way that does not hinder that a waiter and a waitee can group commit together with a single fsync() in both binlog and InnoDB. The wait is done efficiently with respect to locking. The patch was originally made to support TaoBao parallel replication with in-order commit; now it will be adapted to also be used for parallel replication of group-committed transactions. A waiter THD registers itself with a prior waitee THD. The waiter will then complete its commit at the earliest in the same group commit of the waitee (when using binlog). The wait can also be done explicitly by the waitee. --- include/mysql/plugin.h | 35 +++++ include/mysql/plugin_audit.h.pp | 1 + include/mysql/plugin_auth.h.pp | 1 + include/mysql/plugin_ftparser.h.pp | 1 + sql/handler.cc | 8 + sql/log.cc | 193 ++++++++++++++++++++++-- sql/log.h | 13 ++ sql/mysqld.cc | 7 +- sql/mysqld.h | 5 +- sql/sql_class.cc | 208 ++++++++++++++++++++++++++ sql/sql_class.h | 122 +++++++++++++++ storage/innobase/handler/ha_innodb.cc | 5 + storage/xtradb/handler/ha_innodb.cc | 5 + 13 files changed, 586 insertions(+), 18 deletions(-) diff --git a/include/mysql/plugin.h b/include/mysql/plugin.h index 38573180232..ab72a9d106b 100644 --- a/include/mysql/plugin.h +++ b/include/mysql/plugin.h @@ -683,6 +683,41 @@ void *thd_get_ha_data(const MYSQL_THD thd, const struct handlerton *hton); */ void thd_set_ha_data(MYSQL_THD thd, const struct handlerton *hton, const void *ha_data); + + +/** + Signal that the first part of handler commit is finished, and that the + committed transaction is now visible and has fixed commit ordering with + respect to other transactions. The commit need _not_ be durable yet, and + typically will not be when this call makes sense. + + This call is optional, if the storage engine does not call it the upper + layer will after the handler commit() method is done. However, the storage + engine may choose to call it itself to increase the possibility for group + commit. + + In-order parallel replication uses this to apply different transaction in + parallel, but delay the commits of later transactions until earlier + transactions have committed first, thus achieving increased performance on + multi-core systems while still preserving full transaction consistency. + + The storage engine can call this from within the commit() method, typically + after the commit record has been written to the transaction log, but before + the log has been fsync()'ed. This will allow the next replicated transaction + to proceed to commit before the first one has done fsync() or similar. Thus, + it becomes possible for multiple sequential replicated transactions to share + a single fsync() inside the engine in group commit. + + Note that this method should _not_ be called from within the commit_ordered() + method, or any other place in the storage engine. When commit_ordered() is + used (typically when binlog is enabled), the transaction coordinator takes + care of this and makes group commit in the storage engine possible without + any other action needed on the part of the storage engine. This function + thd_wakeup_subsequent_commits() is only needed when no transaction + coordinator is used, meaning a single storage engine and no binary log. +*/ +void thd_wakeup_subsequent_commits(MYSQL_THD thd); + #ifdef __cplusplus } #endif diff --git a/include/mysql/plugin_audit.h.pp b/include/mysql/plugin_audit.h.pp index d630359f5fe..564dd6272f5 100644 --- a/include/mysql/plugin_audit.h.pp +++ b/include/mysql/plugin_audit.h.pp @@ -236,6 +236,7 @@ void mysql_query_cache_invalidate4(void* thd, void *thd_get_ha_data(const void* thd, const struct handlerton *hton); void thd_set_ha_data(void* thd, const struct handlerton *hton, const void *ha_data); +void thd_wakeup_subsequent_commits(void* thd); struct mysql_event_general { unsigned int event_subclass; diff --git a/include/mysql/plugin_auth.h.pp b/include/mysql/plugin_auth.h.pp index 6a877980c25..edfd7095203 100644 --- a/include/mysql/plugin_auth.h.pp +++ b/include/mysql/plugin_auth.h.pp @@ -236,6 +236,7 @@ void mysql_query_cache_invalidate4(void* thd, void *thd_get_ha_data(const void* thd, const struct handlerton *hton); void thd_set_ha_data(void* thd, const struct handlerton *hton, const void *ha_data); +void thd_wakeup_subsequent_commits(void* thd); #include typedef struct st_plugin_vio_info { diff --git a/include/mysql/plugin_ftparser.h.pp b/include/mysql/plugin_ftparser.h.pp index ab15c9d176d..0cc51e259dc 100644 --- a/include/mysql/plugin_ftparser.h.pp +++ b/include/mysql/plugin_ftparser.h.pp @@ -189,6 +189,7 @@ void mysql_query_cache_invalidate4(void* thd, void *thd_get_ha_data(const void* thd, const struct handlerton *hton); void thd_set_ha_data(void* thd, const struct handlerton *hton, const void *ha_data); +void thd_wakeup_subsequent_commits(void* thd); enum enum_ftparser_mode { MYSQL_FTPARSER_SIMPLE_MODE= 0, diff --git a/sql/handler.cc b/sql/handler.cc index 660697cd74b..25b2ee13187 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -1455,6 +1455,8 @@ int ha_commit_one_phase(THD *thd, bool all) */ bool is_real_trans=all || thd->transaction.all.ha_list == 0; DBUG_ENTER("ha_commit_one_phase"); + if (is_real_trans) + thd->wait_for_prior_commit(); int res= commit_one_phase_2(thd, all, trans, is_real_trans); DBUG_RETURN(res); } @@ -1494,7 +1496,10 @@ commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans, bool is_real_trans) } /* Free resources and perform other cleanup even for 'empty' transactions. */ if (is_real_trans) + { + thd->wakeup_subsequent_commits(); thd->transaction.cleanup(); + } DBUG_RETURN(error); } @@ -1569,7 +1574,10 @@ int ha_rollback_trans(THD *thd, bool all) } /* Always cleanup. Even if nht==0. There may be savepoints. */ if (is_real_trans) + { + thd->wakeup_subsequent_commits(); thd->transaction.cleanup(); + } if (all) thd->transaction_rollback_request= FALSE; diff --git a/sql/log.cc b/sql/log.cc index d312f4bc936..e3eb5f9a331 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -6542,44 +6542,199 @@ MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd, } bool -MYSQL_BIN_LOG::write_transaction_to_binlog_events(group_commit_entry *entry) +MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *entry, + wait_for_commit *wfc) { + group_commit_entry *orig_queue; + wait_for_commit *list, *cur, *last; + /* To facilitate group commit for the binlog, we first queue up ourselves in the group commit queue. Then the first thread to enter the queue waits for the LOCK_log mutex, and commits for everyone in the queue once it gets the lock. Any other threads in the queue just wait for the first one to finish the commit and wake them up. + + To support in-order parallel replication with group commit, after we add + some transaction to the queue, we check if there were other transactions + already prepared to commit but just waiting for the first one to commit. + If so, we add those to the queue as well, transitively for all waiters. */ entry->thd->clear_wakeup_ready(); mysql_mutex_lock(&LOCK_prepare_ordered); - group_commit_entry *orig_queue= group_commit_queue; - entry->next= orig_queue; - group_commit_queue= entry; + orig_queue= group_commit_queue; - if (entry->cache_mngr->using_xa) + /* + Iteratively process everything added to the queue, looking for waiters, + and their waiters, and so on. If a waiter is ready to commit, we + immediately add it to the queue; if not we just wake it up. + + This would be natural to do with recursion, but we want to avoid + potentially unbounded recursion blowing the C stack, so we use the list + approach instead. + */ + list= wfc; + cur= list; + last= list; + for (;;) { - DEBUG_SYNC(entry->thd, "commit_before_prepare_ordered"); - run_prepare_ordered(entry->thd, entry->all); - DEBUG_SYNC(entry->thd, "commit_after_prepare_ordered"); + /* Add the entry to the group commit queue. */ + entry->next= group_commit_queue; + group_commit_queue= entry; + + if (entry->cache_mngr->using_xa) + { + DEBUG_SYNC(entry->thd, "commit_before_prepare_ordered"); + run_prepare_ordered(entry->thd, entry->all); + DEBUG_SYNC(entry->thd, "commit_after_prepare_ordered"); + } + + if (!cur) + break; // Can happen if initial entry has no wait_for_commit + + if (cur->subsequent_commits_list) + { + bool have_lock; + wait_for_commit *waiter; + + mysql_mutex_lock(&cur->LOCK_wait_commit); + have_lock= true; + waiter= cur->subsequent_commits_list; + /* Check again, now safely under lock. */ + if (waiter) + { + /* Grab the list of waiters and process it. */ + cur->subsequent_commits_list= NULL; + do + { + wait_for_commit *next= waiter->next_subsequent_commit; + group_commit_entry *entry2= + (group_commit_entry *)waiter->opaque_pointer; + if (entry2) + { + /* + This is another transaction ready to be written to the binary + log. We can put it into the queue directly, without needing a + separate context switch to the other thread. We just set a flag + so that the other thread will know when it wakes up that it was + already processed. + + So put it at the end of the list to be processed in a subsequent + iteration of the outer loop. + */ + entry2->queued_by_other= true; + last->next_subsequent_commit= waiter; + last= waiter; + /* + As a small optimisation, we do not actually need to set + waiter->next_subsequent_commit to NULL, as we can use the + pointer `last' to check for end-of-list. + */ + } + else + { + /* + Wake up the waiting transaction. + + For this, we need to set the "wakeup running" flag and release + the waitee lock to avoid a deadlock, see comments on + THD::wakeup_subsequent_commits2() for details. + */ + if (have_lock) + { + cur->wakeup_subsequent_commits_running= true; + mysql_mutex_unlock(&cur->LOCK_wait_commit); + have_lock= false; + } + waiter->wakeup(); + } + waiter= next; + } while (waiter); + } + if (have_lock) + mysql_mutex_unlock(&cur->LOCK_wait_commit); + } + if (cur == last) + break; + cur= cur->next_subsequent_commit; + entry= (group_commit_entry *)cur->opaque_pointer; + DBUG_ASSERT(entry != NULL); } + + /* Now we need to clear the wakeup_subsequent_commits_running flags. */ + if (list) + { + for (;;) + { + if (list->wakeup_subsequent_commits_running) + { + mysql_mutex_lock(&list->LOCK_wait_commit); + list->wakeup_subsequent_commits_running= false; + mysql_mutex_unlock(&list->LOCK_wait_commit); + } + if (list == last) + break; + list= list->next_subsequent_commit; + } + } + mysql_mutex_unlock(&LOCK_prepare_ordered); DEBUG_SYNC(entry->thd, "commit_after_release_LOCK_prepare_ordered"); + return orig_queue == NULL; +} + +bool +MYSQL_BIN_LOG::write_transaction_to_binlog_events(group_commit_entry *entry) +{ + wait_for_commit *wfc; + bool is_leader; + + wfc= entry->thd->wait_for_commit_ptr; + entry->queued_by_other= false; + if (wfc && wfc->waiting_for_commit) + { + mysql_mutex_lock(&wfc->LOCK_wait_commit); + /* Do an extra check here, this time safely under lock. */ + if (wfc->waiting_for_commit) + { + wfc->opaque_pointer= entry; + do + { + mysql_cond_wait(&wfc->COND_wait_commit, &wfc->LOCK_wait_commit); + } while (wfc->waiting_for_commit); + wfc->opaque_pointer= NULL; + } + mysql_mutex_unlock(&wfc->LOCK_wait_commit); + } + + if (entry->queued_by_other) + is_leader= false; + else + is_leader= queue_for_group_commit(entry, wfc); + /* - The first in the queue handle group commit for all; the others just wait + The first in the queue handles group commit for all; the others just wait to be signalled when group commit is done. */ - if (orig_queue != NULL) + if (is_leader) + trx_group_commit_leader(entry); + else if (!entry->queued_by_other) entry->thd->wait_for_wakeup_ready(); else - trx_group_commit_leader(entry); + { + /* + If we were queued by another prior commit, then we are woken up + only when the leader has already completed the commit for us. + So nothing to do here then. + */ + } if (!opt_optimize_thread_scheduling) { /* For the leader, trx_group_commit_leader() already took the lock. */ - if (orig_queue != NULL) + if (!is_leader) mysql_mutex_lock(&LOCK_commit_ordered); DEBUG_SYNC(entry->thd, "commit_loop_entry_commit_ordered"); @@ -6598,7 +6753,10 @@ MYSQL_BIN_LOG::write_transaction_to_binlog_events(group_commit_entry *entry) if (next) { - next->thd->signal_wakeup_ready(); + if (next->queued_by_other) + next->thd->wait_for_commit_ptr->wakeup(); + else + next->thd->signal_wakeup_ready(); } else { @@ -6884,7 +7042,12 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader) */ next= current->next; if (current != leader) // Don't wake up ourself - current->thd->signal_wakeup_ready(); + { + if (current->queued_by_other) + current->thd->wait_for_commit_ptr->wakeup(); + else + current->thd->signal_wakeup_ready(); + } current= next; } DEBUG_SYNC(leader->thd, "commit_after_group_run_commit_ordered"); @@ -7514,6 +7677,8 @@ int TC_LOG_MMAP::log_and_order(THD *thd, my_xid xid, bool all, mysql_mutex_unlock(&LOCK_prepare_ordered); } + thd->wait_for_prior_commit(); + cookie= 0; if (xid) cookie= log_one_transaction(xid); diff --git a/sql/log.h b/sql/log.h index 0b1344aa523..2345f0acf9c 100644 --- a/sql/log.h +++ b/sql/log.h @@ -45,6 +45,15 @@ class TC_LOG virtual int open(const char *opt_name)=0; virtual void close()=0; + /* + Transaction coordinator 2-phase commit. + + Must invoke the run_prepare_ordered and run_commit_ordered methods, as + described below for these methods. + + In addition, must invoke THD::wait_for_prior_commit(), or equivalent + wait, to ensure that one commit waits for another if registered to do so. + */ virtual int log_and_order(THD *thd, my_xid xid, bool all, bool need_prepare_ordered, bool need_commit_ordered) = 0; @@ -397,6 +406,7 @@ private: class binlog_cache_mngr; struct rpl_gtid; +class wait_for_commit; class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG { private: @@ -445,6 +455,8 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG group commit, only used when opt_optimize_thread_scheduling is not set. */ bool check_purge; + /* Flag used to optimise around wait_for_prior_commit. */ + bool queued_by_other; ulong binlog_id; }; @@ -526,6 +538,7 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG void do_checkpoint_request(ulong binlog_id); void purge(); int write_transaction_or_stmt(group_commit_entry *entry, uint64 commit_id); + bool queue_for_group_commit(group_commit_entry *entry, wait_for_commit *wfc); bool write_transaction_to_binlog_events(group_commit_entry *entry); void trx_group_commit_leader(group_commit_entry *leader); bool is_xidlist_idle_nolock(); diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 4e2679f1c91..bbb7c0d67bf 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -777,7 +777,7 @@ PSI_mutex_key key_LOCK_slave_state, key_LOCK_binlog_state, PSI_mutex_key key_LOCK_stats, key_LOCK_global_user_client_stats, key_LOCK_global_table_stats, key_LOCK_global_index_stats, - key_LOCK_wakeup_ready; + key_LOCK_wakeup_ready, key_LOCK_wait_commit; PSI_mutex_key key_LOCK_rpl_gtid_state; @@ -825,6 +825,7 @@ static PSI_mutex_info all_server_mutexes[]= { &key_LOCK_global_index_stats, "LOCK_global_index_stats", PSI_FLAG_GLOBAL}, { &key_LOCK_wakeup_ready, "THD::LOCK_wakeup_ready", 0}, { &key_LOCK_rpl_gtid_state, "LOCK_rpl_gtid_state", PSI_FLAG_GLOBAL}, + { &key_LOCK_wait_commit, "wait_for_commit::LOCK_wait_commit", 0}, { &key_LOCK_thd_data, "THD::LOCK_thd_data", 0}, { &key_LOCK_user_conn, "LOCK_user_conn", PSI_FLAG_GLOBAL}, { &key_LOCK_uuid_short_generator, "LOCK_uuid_short_generator", PSI_FLAG_GLOBAL}, @@ -888,7 +889,8 @@ PSI_cond_key key_BINLOG_COND_xid_list, key_BINLOG_update_cond, key_TABLE_SHARE_cond, key_user_level_lock_cond, key_COND_thread_count, key_COND_thread_cache, key_COND_flush_thread_cache, key_BINLOG_COND_queue_busy; -PSI_cond_key key_RELAYLOG_update_cond, key_COND_wakeup_ready; +PSI_cond_key key_RELAYLOG_update_cond, key_COND_wakeup_ready, + key_COND_wait_commit; PSI_cond_key key_RELAYLOG_COND_queue_busy; PSI_cond_key key_TC_LOG_MMAP_COND_queue_busy; PSI_cond_key key_COND_rpl_thread, key_COND_rpl_thread_pool; @@ -912,6 +914,7 @@ static PSI_cond_info all_server_conds[]= { &key_RELAYLOG_update_cond, "MYSQL_RELAY_LOG::update_cond", 0}, { &key_RELAYLOG_COND_queue_busy, "MYSQL_RELAY_LOG::COND_queue_busy", 0}, { &key_COND_wakeup_ready, "THD::COND_wakeup_ready", 0}, + { &key_COND_wait_commit, "wait_for_commit::COND_wait_commit", 0}, { &key_COND_cache_status_changed, "Query_cache::COND_cache_status_changed", 0}, { &key_COND_manager, "COND_manager", PSI_FLAG_GLOBAL}, { &key_COND_rpl_status, "COND_rpl_status", PSI_FLAG_GLOBAL}, diff --git a/sql/mysqld.h b/sql/mysqld.h index ff2dfffa991..ed6d05807b0 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -253,7 +253,7 @@ extern PSI_mutex_key key_LOCK_slave_state, key_LOCK_binlog_state, extern PSI_mutex_key key_LOCK_stats, key_LOCK_global_user_client_stats, key_LOCK_global_table_stats, - key_LOCK_global_index_stats, key_LOCK_wakeup_ready; + key_LOCK_global_index_stats, key_LOCK_wakeup_ready, key_LOCK_wait_commit; extern PSI_mutex_key key_LOCK_rpl_gtid_state; @@ -279,7 +279,8 @@ extern PSI_cond_key key_BINLOG_COND_xid_list, key_BINLOG_update_cond, key_relay_log_info_sleep_cond, key_TABLE_SHARE_cond, key_user_level_lock_cond, key_COND_thread_count, key_COND_thread_cache, key_COND_flush_thread_cache; -extern PSI_cond_key key_RELAYLOG_update_cond, key_COND_wakeup_ready; +extern PSI_cond_key key_RELAYLOG_update_cond, key_COND_wakeup_ready, + key_COND_wait_commit; extern PSI_cond_key key_RELAYLOG_COND_queue_busy; extern PSI_cond_key key_TC_LOG_MMAP_COND_queue_busy; extern PSI_cond_key key_COND_rpl_thread, key_COND_rpl_thread_pool; diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 8e91c4d7901..fa53b38ab70 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -605,6 +605,17 @@ void thd_set_ha_data(THD *thd, const struct handlerton *hton, } +/** + Allow storage engine to wakeup commits waiting in THD::wait_for_prior_commit. + @see thd_wakeup_subsequent_commits() definition in plugin.h +*/ +extern "C" +void thd_wakeup_subsequent_commits(THD *thd) +{ + thd->wakeup_subsequent_commits(); +} + + extern "C" long long thd_test_options(const THD *thd, long long test_options) { @@ -788,6 +799,7 @@ THD::THD() #if defined(ENABLED_DEBUG_SYNC) debug_sync_control(0), #endif /* defined(ENABLED_DEBUG_SYNC) */ + wait_for_commit_ptr(0), main_warning_info(0, false, false) { ulong tmp; @@ -5580,6 +5592,202 @@ THD::signal_wakeup_ready() } +wait_for_commit::wait_for_commit() + : subsequent_commits_list(0), next_subsequent_commit(0), waitee(0), + opaque_pointer(0), + waiting_for_commit(false), wakeup_subsequent_commits_running(false) +{ + mysql_mutex_init(key_LOCK_wait_commit, &LOCK_wait_commit, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wait_commit, &COND_wait_commit, 0); +} + + +void +wait_for_commit::wakeup() +{ + /* + We signal each waiter on their own condition and mutex (rather than using + pthread_cond_broadcast() or something like that). + + Otherwise we would need to somehow ensure that they were done + waking up before we could allow this THD to be destroyed, which would + be annoying and unnecessary. + */ + mysql_mutex_lock(&LOCK_wait_commit); + waiting_for_commit= false; + mysql_cond_signal(&COND_wait_commit); + mysql_mutex_unlock(&LOCK_wait_commit); +} + + +/* + Register that the next commit of this THD should wait to complete until + commit in another THD (the waitee) has completed. + + The wait may occur explicitly, with the waiter sitting in + wait_for_prior_commit() until the waitee calls wakeup_subsequent_commits(). + + Alternatively, the TC (eg. binlog) may do the commits of both waitee and + waiter at once during group commit, resolving both of them in the right + order. + + Only one waitee can be registered for a waiter; it must be removed by + wait_for_prior_commit() or unregister_wait_for_prior_commit() before a new + one is registered. But it is ok for several waiters to register a wait for + the same waitee. It is also permissible for one THD to be both a waiter and + a waitee at the same time. +*/ +void +wait_for_commit::register_wait_for_prior_commit(wait_for_commit *waitee) +{ + waiting_for_commit= true; + DBUG_ASSERT(!this->waitee /* No prior registration allowed */); + this->waitee= waitee; + + mysql_mutex_lock(&waitee->LOCK_wait_commit); + /* + If waitee is in the middle of wakeup, then there is nothing to wait for, + so we need not register. This is necessary to avoid a race in unregister, + see comments on wakeup_subsequent_commits2() for details. + */ + if (waitee->wakeup_subsequent_commits_running) + waiting_for_commit= false; + else + { + this->next_subsequent_commit= waitee->subsequent_commits_list; + waitee->subsequent_commits_list= this; + } + mysql_mutex_unlock(&waitee->LOCK_wait_commit); +} + + +/* + Wait for commit of another transaction to complete, as already registered + with register_wait_for_prior_commit(). If the commit already completed, + returns immediately. +*/ +void +wait_for_commit::wait_for_prior_commit2() +{ + mysql_mutex_lock(&LOCK_wait_commit); + while (waiting_for_commit) + mysql_cond_wait(&COND_wait_commit, &LOCK_wait_commit); + mysql_mutex_unlock(&LOCK_wait_commit); + waitee= NULL; +} + + +/* + Wakeup anyone waiting for us to have committed. + + Note about locking: + + We have a potential race or deadlock between wakeup_subsequent_commits() in + the waitee and unregister_wait_for_prior_commit() in the waiter. + + Both waiter and waitee needs to take their own lock before it is safe to take + a lock on the other party - else the other party might disappear and invalid + memory data could be accessed. But if we take the two locks in different + order, we may end up in a deadlock. + + The waiter needs to lock the waitee to delete itself from the list in + unregister_wait_for_prior_commit(). Thus wakeup_subsequent_commits() can not + hold its own lock while locking waiters, lest we deadlock. + + So we need to prevent unregister_wait_for_prior_commit() running while wakeup + is in progress - otherwise the unregister could complete before the wakeup, + leading to incorrect spurious wakeup or accessing invalid memory. + + However, if we are in the middle of running wakeup_subsequent_commits(), then + there is no need for unregister_wait_for_prior_commit() in the first place - + the waiter can just do a normal wait_for_prior_commit(), as it will be + immediately woken up. + + So the solution to the potential race/deadlock is to set a flag in the waitee + that wakeup_subsequent_commits() is in progress. When this flag is set, + unregister_wait_for_prior_commit() becomes just wait_for_prior_commit(). + + Then also register_wait_for_prior_commit() needs to check if + wakeup_subsequent_commits() is running, and skip the registration if + so. This is needed in case a new waiter manages to register itself and + immediately try to unregister while wakeup_subsequent_commits() is + running. Else the new waiter would also wait rather than unregister, but it + would not be woken up until next wakeup, which could be potentially much + later than necessary. +*/ +void +wait_for_commit::wakeup_subsequent_commits2() +{ + wait_for_commit *waiter; + + mysql_mutex_lock(&LOCK_wait_commit); + wakeup_subsequent_commits_running= true; + waiter= subsequent_commits_list; + subsequent_commits_list= NULL; + mysql_mutex_unlock(&LOCK_wait_commit); + + while (waiter) + { + /* + Important: we must grab the next pointer before waking up the waiter; + once the wakeup is done, the field could be invalidated at any time. + */ + wait_for_commit *next= waiter->next_subsequent_commit; + waiter->wakeup(); + waiter= next; + } + + mysql_mutex_lock(&LOCK_wait_commit); + wakeup_subsequent_commits_running= false; + mysql_mutex_unlock(&LOCK_wait_commit); +} + + +/* Cancel a previously registered wait for another THD to commit before us. */ +void +wait_for_commit::unregister_wait_for_prior_commit2() +{ + mysql_mutex_lock(&LOCK_wait_commit); + if (waiting_for_commit) + { + wait_for_commit *loc_waitee= this->waitee; + wait_for_commit **next_ptr_ptr, *cur; + mysql_mutex_lock(&loc_waitee->LOCK_wait_commit); + if (loc_waitee->wakeup_subsequent_commits_running) + { + /* + When a wakeup is running, we cannot safely remove ourselves from the + list without corrupting it. Instead we can just wait, as wakeup is + already in progress and will thus be immediate. + + See comments on wakeup_subsequent_commits2() for more details. + */ + mysql_mutex_unlock(&loc_waitee->LOCK_wait_commit); + while (waiting_for_commit) + mysql_cond_wait(&COND_wait_commit, &LOCK_wait_commit); + } + else + { + /* Remove ourselves from the list in the waitee. */ + next_ptr_ptr= &loc_waitee->subsequent_commits_list; + while ((cur= *next_ptr_ptr) != NULL) + { + if (cur == this) + { + *next_ptr_ptr= this->next_subsequent_commit; + break; + } + next_ptr_ptr= &cur->next_subsequent_commit; + } + waiting_for_commit= false; + mysql_mutex_unlock(&loc_waitee->LOCK_wait_commit); + } + } + mysql_mutex_unlock(&LOCK_wait_commit); + this->waitee= NULL; +} + + bool Discrete_intervals_list::append(ulonglong start, ulonglong val, ulonglong incr) { diff --git a/sql/sql_class.h b/sql/sql_class.h index bb5b2c4e775..4e1917f62b7 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -1553,6 +1553,115 @@ private: }; +/* + Class to facilitate the commit of one transactions waiting for the commit of + another transaction to complete first. + + This is used during (parallel) replication, to allow different transactions + to be applied in parallel, but still commit in order. + + The transaction that wants to wait for a prior commit must first register + to wait with register_wait_for_prior_commit(waitee). Such registration + must be done holding the waitee->LOCK_wait_commit, to prevent the other + THD from disappearing during the registration. + + Then during commit, if a THD is registered to wait, it will call + wait_for_prior_commit() as part of ha_commit_trans(). If no wait is + registered, or if the waitee for has already completed commit, then + wait_for_prior_commit() returns immediately. + + And when a THD that may be waited for has completed commit (more precisely + commit_ordered()), then it must call wakeup_subsequent_commits() to wake + up any waiters. Note that this must be done at a point that is guaranteed + to be later than any waiters registering themselves. It is safe to call + wakeup_subsequent_commits() multiple times, as waiters are removed from + registration as part of the wakeup. + + The reason for separate register and wait calls is that this allows to + register the wait early, at a point where the waited-for THD is known to + exist. And then the actual wait can be done much later, where the + waited-for THD may have been long gone. By registering early, the waitee + can signal before disappearing. +*/ +struct wait_for_commit +{ + /* + The LOCK_wait_commit protects the fields subsequent_commits_list and + wakeup_subsequent_commits_running (for a waitee), and the flag + waiting_for_commit and associated COND_wait_commit (for a waiter). + */ + mysql_mutex_t LOCK_wait_commit; + mysql_cond_t COND_wait_commit; + /* List of threads that did register_wait_for_prior_commit() on us. */ + wait_for_commit *subsequent_commits_list; + /* Link field for entries in subsequent_commits_list. */ + wait_for_commit *next_subsequent_commit; + /* Our waitee, if we did register_wait_for_prior_commit(), else NULL. */ + wait_for_commit *waitee; + /* + Generic pointer for use by the transaction coordinator to optimise the + waiting for improved group commit. + + Currently used by binlog TC to signal that a waiter is ready to commit, so + that the waitee can grab it and group commit it directly. It is free to be + used by another transaction coordinator for similar purposes. + */ + void *opaque_pointer; + /* + The waiting_for_commit flag is cleared when a waiter has been woken + up. The COND_wait_commit condition is signalled when this has been + cleared. + */ + bool waiting_for_commit; + /* + Flag set when wakeup_subsequent_commits_running() is active, see commonts + on that function for details. + */ + bool wakeup_subsequent_commits_running; + + void register_wait_for_prior_commit(wait_for_commit *waitee); + void wait_for_prior_commit() + { + /* + Quick inline check, to avoid function call and locking in the common case + where no wakeup is registered, or a registered wait was already signalled. + */ + if (waiting_for_commit) + wait_for_prior_commit2(); + } + void wakeup_subsequent_commits() + { + /* + Do the check inline, so only the wakeup case takes the cost of a function + call for every commmit. + + Note that the check is done without locking. It is the responsibility of + the user of the wakeup facility to ensure that no waiters can register + themselves after the last call to wakeup_subsequent_commits(). + + This avoids having to take another lock for every commit, which would be + pointless anyway - even if we check under lock, there is nothing to + prevent a waiter from arriving just after releasing the lock. + */ + if (subsequent_commits_list) + wakeup_subsequent_commits2(); + } + void unregister_wait_for_prior_commit() + { + if (waiting_for_commit) + unregister_wait_for_prior_commit2(); + } + + void wakeup(); + + void wait_for_prior_commit2(); + void wakeup_subsequent_commits2(); + void unregister_wait_for_prior_commit2(); + + wait_for_commit(); +}; + + extern "C" void my_message_sql(uint error, const char *str, myf MyFlags); class THD; @@ -3194,6 +3303,19 @@ public: void wait_for_wakeup_ready(); /* Wake this thread up from wait_for_wakeup_ready(). */ void signal_wakeup_ready(); + + wait_for_commit *wait_for_commit_ptr; + void wait_for_prior_commit() + { + if (wait_for_commit_ptr) + wait_for_commit_ptr->wait_for_prior_commit(); + } + void wakeup_subsequent_commits() + { + if (wait_for_commit_ptr) + wait_for_commit_ptr->wakeup_subsequent_commits(); + } + private: /** The current internal error handler for this thread, or NULL. */ diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 4b77936550b..4d4bb7bd1f3 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -2924,6 +2924,11 @@ innobase_commit( /* We were instructed to commit the whole transaction, or this is an SQL statement end and autocommit is on */ + /* At this point commit order is fixed and transaction is + visible to others. So we can wakeup other commits waiting for + this one, to allow then to group commit with us. */ + thd_wakeup_subsequent_commits(thd); + /* We did the first part already in innobase_commit_ordered(), Now finish by doing a write + flush of logs. */ trx_commit_complete_for_mysql(trx); diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index f7507a04412..e80810d3948 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -3585,6 +3585,11 @@ innobase_commit( /* We were instructed to commit the whole transaction, or this is an SQL statement end and autocommit is on */ + /* At this point commit order is fixed and transaction is + visible to others. So we can wakeup other commits waiting for + this one, to allow then to group commit with us. */ + thd_wakeup_subsequent_commits(thd); + /* We did the first part already in innobase_commit_ordered(), Now finish by doing a write + flush of logs. */ trx_commit_complete_for_mysql(trx); From 1b3dc66e3117a09c95a00be2f649b975fdb25e2e Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 28 Jun 2013 15:19:30 +0200 Subject: [PATCH 05/41] MDEV-4506: Parallel replication: Intermediate commit. First step of splitting out part of Relay_log_info, so that different event groups being applied in parallel can each use their own copy. --- sql/log_event.cc | 88 +++++++++++++++++++++++++++------------------ sql/log_event.h | 52 +++++++++++++-------------- sql/rpl_gtid.cc | 9 ++--- sql/rpl_parallel.cc | 10 ++++-- sql/rpl_parallel.h | 2 +- sql/rpl_rli.cc | 4 ++- sql/rpl_rli.h | 31 ++++++++++++---- sql/rpl_utility.cc | 5 +-- sql/rpl_utility.h | 2 +- sql/slave.cc | 30 ++++++++++++---- sql/slave.h | 3 +- sql/sql_binlog.cc | 14 +++++++- 12 files changed, 161 insertions(+), 89 deletions(-) diff --git a/sql/log_event.cc b/sql/log_event.cc index 431f8b47f2d..8bbc43dec35 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -3755,9 +3755,9 @@ void Query_log_event::print(FILE* file, PRINT_EVENT_INFO* print_event_info) #if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) -int Query_log_event::do_apply_event(Relay_log_info const *rli) +int Query_log_event::do_apply_event(struct rpl_group_info *rgi) { - return do_apply_event(rli, query, q_len); + return do_apply_event(rgi, query, q_len); } /** @@ -3806,7 +3806,7 @@ bool test_if_equal_repl_errors(int expected_error, int actual_error) mismatch. This mismatch could be implemented with a new ER_ code, and to ignore it you would use --slave-skip-errors... */ -int Query_log_event::do_apply_event(Relay_log_info const *rli, +int Query_log_event::do_apply_event(struct rpl_group_info *rgi, const char *query_arg, uint32 q_len_arg) { LEX_STRING new_db; @@ -3814,6 +3814,7 @@ int Query_log_event::do_apply_event(Relay_log_info const *rli, HA_CREATE_INFO db_options; uint64 sub_id= 0; rpl_gtid gtid; + Relay_log_info const *rli= rgi->rli; Rpl_filter *rpl_filter= rli->mi->rpl_filter; DBUG_ENTER("Query_log_event::do_apply_event"); @@ -4006,12 +4007,12 @@ int Query_log_event::do_apply_event(Relay_log_info const *rli, Record any GTID in the same transaction, so slave state is transactionally consistent. */ - if (strcmp("COMMIT", query) == 0 && (sub_id= rli->gtid_sub_id)) + if (strcmp("COMMIT", query) == 0 && (sub_id= rgi->gtid_sub_id)) { /* Clear the GTID from the RLI so we don't accidentally reuse it. */ - const_cast(rli)->gtid_sub_id= 0; + rgi->gtid_sub_id= 0; - gtid= rli->current_gtid; + gtid= rgi->current_gtid; if (rpl_global_gtid_slave_state.record_gtid(thd, >id, sub_id, true, false)) { rli->report(ERROR_LEVEL, ER_CANNOT_UPDATE_GTID_STATE, @@ -4458,10 +4459,12 @@ bool Start_log_event_v3::write(IO_CACHE* file) other words, no deadlock problem. */ -int Start_log_event_v3::do_apply_event(Relay_log_info const *rli) +int Start_log_event_v3::do_apply_event(struct rpl_group_info *rgi) { DBUG_ENTER("Start_log_event_v3::do_apply_event"); int error= 0; + Relay_log_info const *rli= rgi->rli; + switch (binlog_version) { case 3: @@ -4805,9 +4808,10 @@ bool Format_description_log_event::write(IO_CACHE* file) #endif #if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) -int Format_description_log_event::do_apply_event(Relay_log_info const *rli) +int Format_description_log_event::do_apply_event(struct rpl_group_info *rgi) { int ret= 0; + Relay_log_info const *rli= rgi->rli; DBUG_ENTER("Format_description_log_event::do_apply_event"); /* @@ -4848,7 +4852,7 @@ int Format_description_log_event::do_apply_event(Relay_log_info const *rli) 0, then 96, then jump to first really asked event (which is >96). So this is ok. */ - ret= Start_log_event_v3::do_apply_event(rli); + ret= Start_log_event_v3::do_apply_event(rgi); } if (!ret) @@ -5509,10 +5513,11 @@ void Load_log_event::set_fields(const char* affected_db, 1 Failure */ -int Load_log_event::do_apply_event(NET* net, Relay_log_info const *rli, +int Load_log_event::do_apply_event(NET* net, struct rpl_group_info *rgi, bool use_rli_only_for_errors) { LEX_STRING new_db; + Relay_log_info const *rli= rgi->rli; Rpl_filter *rpl_filter= rli->mi->rpl_filter; DBUG_ENTER("Load_log_event::do_apply_event"); @@ -5776,7 +5781,7 @@ Error '%s' running LOAD DATA INFILE on table '%s'. Default database: '%s'", DBUG_RETURN(1); } - DBUG_RETURN( use_rli_only_for_errors ? 0 : Log_event::do_apply_event(rli) ); + DBUG_RETURN( use_rli_only_for_errors ? 0 : Log_event::do_apply_event(rgi) ); } #endif @@ -6245,7 +6250,7 @@ Gtid_log_event::pack_info(THD *thd, Protocol *protocol) static char gtid_begin_string[] = "BEGIN"; int -Gtid_log_event::do_apply_event(Relay_log_info const *rli) +Gtid_log_event::do_apply_event(struct rpl_group_info *rgi) { thd->variables.server_id= this->server_id; thd->variables.gtid_domain_id= this->domain_id; @@ -6467,9 +6472,10 @@ Gtid_list_log_event::write(IO_CACHE *file) int -Gtid_list_log_event::do_apply_event(Relay_log_info const *rli) +Gtid_list_log_event::do_apply_event(struct rpl_group_info *rgi) { - int ret= Log_event::do_apply_event(rli); + Relay_log_info const *rli= rgi->rli; + int ret= Log_event::do_apply_event(rgi); if (rli->until_condition == Relay_log_info::UNTIL_GTID && (gl_flags & FLAG_UNTIL_REACHED)) { @@ -6696,13 +6702,14 @@ void Intvar_log_event::print(FILE* file, PRINT_EVENT_INFO* print_event_info) Intvar_log_event::do_apply_event() */ -int Intvar_log_event::do_apply_event(Relay_log_info const *rli) +int Intvar_log_event::do_apply_event(struct rpl_group_info *rgi) { + Relay_log_info *rli= rgi->rli; /* We are now in a statement until the associated query log event has been processed. */ - const_cast(rli)->set_flag(Relay_log_info::IN_STMT); + rli->set_flag(Relay_log_info::IN_STMT); if (rli->deferred_events_collecting) return rli->deferred_events->add(this); @@ -6805,8 +6812,9 @@ void Rand_log_event::print(FILE* file, PRINT_EVENT_INFO* print_event_info) #if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) -int Rand_log_event::do_apply_event(Relay_log_info const *rli) +int Rand_log_event::do_apply_event(struct rpl_group_info *rgi) { + Relay_log_info const *rli= rgi->rli; /* We are now in a statement until the associated query log event has been processed. @@ -6860,7 +6868,7 @@ bool slave_execute_deferred_events(THD *thd) if (!rli->deferred_events_collecting || rli->deferred_events->is_empty()) return res; - res= rli->deferred_events->execute(rli); + res= rli->deferred_events->execute(rli->group_info); return res; } @@ -6935,23 +6943,24 @@ void Xid_log_event::print(FILE* file, PRINT_EVENT_INFO* print_event_info) #if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) -int Xid_log_event::do_apply_event(Relay_log_info const *rli) +int Xid_log_event::do_apply_event(struct rpl_group_info *rgi) { bool res; int err; rpl_gtid gtid; uint64 sub_id; + Relay_log_info const *rli= rgi->rli; /* Record any GTID in the same transaction, so slave state is transactionally consistent. */ - if ((sub_id= rli->gtid_sub_id)) + if ((sub_id= rgi->gtid_sub_id)) { /* Clear the GTID from the RLI so we don't accidentally reuse it. */ - const_cast(rli)->gtid_sub_id= 0; + rgi->gtid_sub_id= 0; - gtid= rli->current_gtid; + gtid= rgi->current_gtid; err= rpl_global_gtid_slave_state.record_gtid(thd, >id, sub_id, true, false); if (err) { @@ -7400,10 +7409,11 @@ void User_var_log_event::print(FILE* file, PRINT_EVENT_INFO* print_event_info) */ #if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) -int User_var_log_event::do_apply_event(Relay_log_info const *rli) +int User_var_log_event::do_apply_event(struct rpl_group_info *rgi) { Item *it= 0; CHARSET_INFO *charset; + Relay_log_info const *rli= rgi->rli; DBUG_ENTER("User_var_log_event::do_apply_event"); if (rli->deferred_events_collecting) @@ -7664,7 +7674,7 @@ Slave_log_event::Slave_log_event(const char* buf, #ifndef MYSQL_CLIENT -int Slave_log_event::do_apply_event(Relay_log_info const *rli) +int Slave_log_event::do_apply_event(struct rpl_group_info *rgi) { if (mysql_bin_log.is_open()) return mysql_bin_log.write(this); @@ -7939,13 +7949,14 @@ void Create_file_log_event::pack_info(THD *thd, Protocol *protocol) */ #if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) -int Create_file_log_event::do_apply_event(Relay_log_info const *rli) +int Create_file_log_event::do_apply_event(struct rpl_group_info *rgi) { char proc_info[17+FN_REFLEN+10], *fname_buf; char *ext; int fd = -1; IO_CACHE file; int error = 1; + Relay_log_info const *rli= rgi->rli; bzero((char*)&file, sizeof(file)); fname_buf= strmov(proc_info, "Making temp file "); @@ -8120,11 +8131,12 @@ int Append_block_log_event::get_create_or_append() const Append_block_log_event::do_apply_event() */ -int Append_block_log_event::do_apply_event(Relay_log_info const *rli) +int Append_block_log_event::do_apply_event(struct rpl_group_info *rgi) { char proc_info[17+FN_REFLEN+10], *fname= proc_info+17; int fd; int error = 1; + Relay_log_info const *rli= rgi->rli; DBUG_ENTER("Append_block_log_event::do_apply_event"); fname= strmov(proc_info, "Making temp file "); @@ -8270,9 +8282,10 @@ void Delete_file_log_event::pack_info(THD *thd, Protocol *protocol) */ #if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) -int Delete_file_log_event::do_apply_event(Relay_log_info const *rli) +int Delete_file_log_event::do_apply_event(struct rpl_group_info *rgi) { char fname[FN_REFLEN+10]; + Relay_log_info const *rli= rgi->rli; char *ext= slave_load_file_stem(fname, file_id, server_id, ".data", &rli->mi->cmp_connection_name); mysql_file_delete(key_file_log_event_data, fname, MYF(MY_WME)); @@ -8369,7 +8382,7 @@ void Execute_load_log_event::pack_info(THD *thd, Protocol *protocol) Execute_load_log_event::do_apply_event() */ -int Execute_load_log_event::do_apply_event(Relay_log_info const *rli) +int Execute_load_log_event::do_apply_event(struct rpl_group_info *rgi) { char fname[FN_REFLEN+10]; char *ext; @@ -8377,6 +8390,7 @@ int Execute_load_log_event::do_apply_event(Relay_log_info const *rli) int error= 1; IO_CACHE file; Load_log_event *lev= 0; + Relay_log_info const *rli= rgi->rli; ext= slave_load_file_stem(fname, file_id, server_id, ".info", &rli->mi->cmp_connection_name); @@ -8412,7 +8426,7 @@ int Execute_load_log_event::do_apply_event(Relay_log_info const *rli) */ const_cast(rli)->future_group_master_log_pos= log_pos; - if (lev->do_apply_event(0,rli,1)) + if (lev->do_apply_event(0,rgi,1)) { /* We want to indicate the name of the file that could not be loaded @@ -8641,13 +8655,14 @@ void Execute_load_query_log_event::pack_info(THD *thd, Protocol *protocol) int -Execute_load_query_log_event::do_apply_event(Relay_log_info const *rli) +Execute_load_query_log_event::do_apply_event(struct rpl_group_info *rgi) { char *p; char *buf; char *fname; char *fname_end; int error; + Relay_log_info const *rli= rgi->rli; buf= (char*) my_malloc(q_len + 1 - (fn_pos_end - fn_pos_start) + (FN_REFLEN + 10) + 10 + 8 + 5, MYF(MY_WME)); @@ -8684,7 +8699,7 @@ Execute_load_query_log_event::do_apply_event(Relay_log_info const *rli) p= strmake(p, STRING_WITH_LEN(" INTO ")); p= strmake(p, query+fn_pos_end, q_len-fn_pos_end); - error= Query_log_event::do_apply_event(rli, buf, p-buf); + error= Query_log_event::do_apply_event(rgi, buf, p-buf); /* Forging file name for deletion in same buffer */ *fname_end= 0; @@ -9048,8 +9063,9 @@ int Rows_log_event::do_add_row_data(uchar *row_data, size_t length) #endif #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) -int Rows_log_event::do_apply_event(Relay_log_info const *rli) +int Rows_log_event::do_apply_event(struct rpl_group_info *rgi) { + Relay_log_info const *rli= rgi->rli; DBUG_ENTER("Rows_log_event::do_apply_event(Relay_log_info*)"); int error= 0; /* @@ -9751,7 +9767,7 @@ void Annotate_rows_log_event::print(FILE *file, PRINT_EVENT_INFO *pinfo) #endif #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) -int Annotate_rows_log_event::do_apply_event(Relay_log_info const *rli) +int Annotate_rows_log_event::do_apply_event(struct rpl_group_info *rgi) { m_save_thd_query_txt= thd->query(); m_save_thd_query_len= thd->query_length(); @@ -10269,13 +10285,14 @@ check_table_map(Relay_log_info const *rli, RPL_TABLE_LIST *table_list) DBUG_RETURN(res); } -int Table_map_log_event::do_apply_event(Relay_log_info const *rli) +int Table_map_log_event::do_apply_event(struct rpl_group_info *rgi) { RPL_TABLE_LIST *table_list; char *db_mem, *tname_mem; size_t dummy_len; void *memory; Rpl_filter *filter; + Relay_log_info const *rli= rgi->rli; DBUG_ENTER("Table_map_log_event::do_apply_event(Relay_log_info*)"); DBUG_ASSERT(rli->sql_thd == thd); @@ -11818,8 +11835,9 @@ Incident_log_event::print(FILE *file, #if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) int -Incident_log_event::do_apply_event(Relay_log_info const *rli) +Incident_log_event::do_apply_event(struct rpl_group_info *rgi) { + Relay_log_info const *rli= rgi->rli; DBUG_ENTER("Incident_log_event::do_apply_event"); rli->report(ERROR_LEVEL, ER_SLAVE_INCIDENT, ER(ER_SLAVE_INCIDENT), diff --git a/sql/log_event.h b/sql/log_event.h index 641ab3e37b7..8a60296695b 100644 --- a/sql/log_event.h +++ b/sql/log_event.h @@ -1317,9 +1317,9 @@ public: @see do_apply_event */ - int apply_event(Relay_log_info const *rli) + int apply_event(struct rpl_group_info *rgi) { - return do_apply_event(rli); + return do_apply_event(rgi); } @@ -1412,7 +1412,7 @@ protected: @retval 0 Event applied successfully @retval errno Error code if event application failed */ - virtual int do_apply_event(Relay_log_info const *rli) + virtual int do_apply_event(struct rpl_group_info *rgi) { return 0; /* Default implementation does nothing */ } @@ -1966,10 +1966,10 @@ public: public: /* !!! Public in this patch to allow old usage */ #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); - virtual int do_apply_event(Relay_log_info const *rli); + virtual int do_apply_event(struct rpl_group_info *rgi); virtual int do_update_pos(Relay_log_info *rli); - int do_apply_event(Relay_log_info const *rli, + int do_apply_event(struct rpl_group_info *rgi, const char *query_arg, uint32 q_len_arg); static bool peek_is_commit_rollback(const char *event_start, @@ -2083,7 +2083,7 @@ public: private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) - virtual int do_apply_event(Relay_log_info const* rli); + virtual int do_apply_event(struct rpl_group_info *rgi); #endif }; @@ -2396,12 +2396,12 @@ public: public: /* !!! Public in this patch to allow old usage */ #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) - virtual int do_apply_event(Relay_log_info const* rli) + virtual int do_apply_event(struct rpl_group_info *rgi) { - return do_apply_event(thd->slave_net,rli,0); + return do_apply_event(thd->slave_net,rgi,0); } - int do_apply_event(NET *net, Relay_log_info const *rli, + int do_apply_event(NET *net, struct rpl_group_info *rgi, bool use_rli_only_for_errors); #endif }; @@ -2480,7 +2480,7 @@ public: protected: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) - virtual int do_apply_event(Relay_log_info const *rli); + virtual int do_apply_event(struct rpl_group_info *rgi); virtual enum_skip_reason do_shall_skip(Relay_log_info*) { /* @@ -2576,7 +2576,7 @@ public: static bool is_version_before_checksum(const master_version_split *version_split); protected: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) - virtual int do_apply_event(Relay_log_info const *rli); + virtual int do_apply_event(struct rpl_group_info *rgi); virtual int do_update_pos(Relay_log_info *rli); virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); #endif @@ -2655,7 +2655,7 @@ Intvar_log_event(THD* thd_arg,uchar type_arg, ulonglong val_arg, private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) - virtual int do_apply_event(Relay_log_info const *rli); + virtual int do_apply_event(struct rpl_group_info *rgi); virtual int do_update_pos(Relay_log_info *rli); virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); #endif @@ -2734,7 +2734,7 @@ class Rand_log_event: public Log_event private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) - virtual int do_apply_event(Relay_log_info const *rli); + virtual int do_apply_event(struct rpl_group_info *rgi); virtual int do_update_pos(Relay_log_info *rli); virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); #endif @@ -2783,7 +2783,7 @@ class Xid_log_event: public Log_event private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) - virtual int do_apply_event(Relay_log_info const *rli); + virtual int do_apply_event(struct rpl_group_info *rgi); enum_skip_reason do_shall_skip(Relay_log_info *rli); #endif }; @@ -2850,7 +2850,7 @@ public: private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) - virtual int do_apply_event(Relay_log_info const *rli); + virtual int do_apply_event(struct rpl_group_info *rgi); virtual int do_update_pos(Relay_log_info *rli); virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); #endif @@ -3099,7 +3099,7 @@ public: uint16 flags, bool is_transactional, uint64 commit_id); #ifdef HAVE_REPLICATION void pack_info(THD *thd, Protocol *protocol); - virtual int do_apply_event(Relay_log_info const *rli); + virtual int do_apply_event(struct rpl_group_info *rgi); virtual int do_update_pos(Relay_log_info *rli); virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); #endif @@ -3229,7 +3229,7 @@ public: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) bool to_packet(String *packet); bool write(IO_CACHE *file); - virtual int do_apply_event(Relay_log_info const *rli); + virtual int do_apply_event(struct rpl_group_info *rgi); #endif static bool peek(const char *event_start, uint32 event_len, uint8 checksum_alg, @@ -3308,7 +3308,7 @@ public: private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) - virtual int do_apply_event(Relay_log_info const *rli); + virtual int do_apply_event(struct rpl_group_info *rgi); #endif }; @@ -3363,7 +3363,7 @@ public: private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) - virtual int do_apply_event(Relay_log_info const *rli); + virtual int do_apply_event(struct rpl_group_info *rgi); #endif }; @@ -3404,7 +3404,7 @@ public: private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) - virtual int do_apply_event(Relay_log_info const *rli); + virtual int do_apply_event(struct rpl_group_info *rgi); #endif }; @@ -3444,7 +3444,7 @@ public: private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) - virtual int do_apply_event(Relay_log_info const *rli); + virtual int do_apply_event(struct rpl_group_info *rgi); #endif }; @@ -3543,7 +3543,7 @@ public: private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) - virtual int do_apply_event(Relay_log_info const *rli); + virtual int do_apply_event(struct rpl_group_info *rgi); #endif }; @@ -3615,7 +3615,7 @@ public: #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) private: - virtual int do_apply_event(Relay_log_info const*); + virtual int do_apply_event(struct rpl_group_info *rgi); virtual int do_update_pos(Relay_log_info*); virtual enum_skip_reason do_shall_skip(Relay_log_info*); #endif @@ -4030,7 +4030,7 @@ public: private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) - virtual int do_apply_event(Relay_log_info const *rli); + virtual int do_apply_event(struct rpl_group_info *rgi); virtual int do_update_pos(Relay_log_info *rli); virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); #endif @@ -4258,7 +4258,7 @@ protected: private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) - virtual int do_apply_event(Relay_log_info const *rli); + virtual int do_apply_event(struct rpl_group_info *rgi); virtual int do_update_pos(Relay_log_info *rli); virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); @@ -4592,7 +4592,7 @@ public: #endif #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) - virtual int do_apply_event(Relay_log_info const *rli); + virtual int do_apply_event(struct rpl_group_info *rgi); #endif virtual bool write_data_header(IO_CACHE *file); diff --git a/sql/rpl_gtid.cc b/sql/rpl_gtid.cc index 71b18e64842..54d3b704a2c 100644 --- a/sql/rpl_gtid.cc +++ b/sql/rpl_gtid.cc @@ -65,17 +65,18 @@ int rpl_slave_state::record_and_update_gtid(THD *thd, Relay_log_info *rli) { uint64 sub_id; + struct rpl_group_info *rgi; /* Update the GTID position, if we have it and did not already update it in a GTID transaction. */ - if ((sub_id= rli->gtid_sub_id)) + if ((rgi= rli->group_info) && (sub_id= rgi->gtid_sub_id)) { - rli->gtid_sub_id= 0; - if (record_gtid(thd, &rli->current_gtid, sub_id, false, false)) + rgi->gtid_sub_id= 0; + if (record_gtid(thd, &rgi->current_gtid, sub_id, false, false)) return 1; - update_state_hash(sub_id, &rli->current_gtid); + update_state_hash(sub_id, &rgi->current_gtid); } return 0; } diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index f1ac7e83071..8ea4799e94a 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -13,15 +13,18 @@ rpt_handle_event(rpl_parallel_thread::queued_event *qev, struct rpl_parallel_thread *rpt) { int err; - Relay_log_info *rli= qev->rli; + struct rpl_group_info *rgi= qev->rgi; + Relay_log_info *rli= rgi->rli; thd->rli_slave= rli; thd->rpl_filter = rli->mi->rpl_filter; /* ToDo: Access to thd, and what about rli, split out a parallel part? */ mysql_mutex_lock(&rli->data_lock); - err= apply_event_and_update_pos(qev->ev, thd, rli, rpt); + err= apply_event_and_update_pos(qev->ev, thd, rgi, rpt); /* ToDo: error handling. */ /* ToDo: also free qev->ev, or hold on to it for a bit if necessary. */ + my_free(rgi); + rgi= NULL; } @@ -398,7 +401,8 @@ rpl_parallel::do_event(Relay_log_info *rli, Log_event *ev, THD *parent_thd) return true; } qev->ev= ev; - qev->rli= rli; + qev->rgi= rli->group_info; + rli->group_info= NULL; /* Avoid conflict with groups applied in parallel */ qev->next= NULL; if (ev->get_type_code() == GTID_EVENT) diff --git a/sql/rpl_parallel.h b/sql/rpl_parallel.h index 7e966f1615c..c5bb39cb6fc 100644 --- a/sql/rpl_parallel.h +++ b/sql/rpl_parallel.h @@ -23,7 +23,7 @@ struct rpl_parallel_thread { struct queued_event { queued_event *next; Log_event *ev; - Relay_log_info *rli; + struct rpl_group_info *rgi; } *event_queue, *last_in_queue; rpl_parallel_thread *wait_for; /* ToDo: change this ... */ }; diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index 12c38f95575..5d5bca1189c 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -59,7 +59,7 @@ Relay_log_info::Relay_log_info(bool is_slave_recovery) abort_pos_wait(0), slave_run_id(0), sql_thd(0), inited(0), abort_slave(0), slave_running(0), until_condition(UNTIL_NONE), until_log_pos(0), retried_trans(0), executed_entries(0), - gtid_sub_id(0), tables_to_lock(0), tables_to_lock_count(0), + group_info(0), tables_to_lock(0), tables_to_lock_count(0), last_event_start_time(0), deferred_events(NULL),m_flags(0), row_stmt_start_timestamp(0), long_find_row_note_printed(false), m_annotate_event(0) @@ -113,6 +113,8 @@ Relay_log_info::~Relay_log_info() mysql_cond_destroy(&sleep_cond); relay_log.cleanup(); free_annotate_event(); + if (group_info) + my_free(group_info); DBUG_VOID_RETURN; } diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h index 452457e9e5a..c02ae6e3adb 100644 --- a/sql/rpl_rli.h +++ b/sql/rpl_rli.h @@ -53,6 +53,8 @@ class Master_info; *****************************************************************************/ +struct rpl_group_info; + class Relay_log_info : public Slave_reporting_capability { public: @@ -312,13 +314,8 @@ public: char slave_patternload_file[FN_REFLEN]; size_t slave_patternload_file_size; - /* - Current GTID being processed. - The sub_id gives the binlog order within one domain_id. A zero sub_id - means that there is no active GTID. - */ - uint64 gtid_sub_id; - rpl_gtid current_gtid; + /* Various data related to the currently executing event group. */ + struct rpl_group_info *group_info; rpl_parallel parallel; Relay_log_info(bool is_slave_recovery); @@ -596,6 +593,26 @@ private: }; +/* + This is data for various state needed to be kept for the processing of + one event group in the SQL thread. + + For single-threaded replication it is linked from the RLI, for parallel + replication it is linked into each event group being executed in parallel. +*/ +struct rpl_group_info +{ + Relay_log_info *rli; + /* + Current GTID being processed. + The sub_id gives the binlog order within one domain_id. A zero sub_id + means that there is no active GTID. + */ + uint64 gtid_sub_id; + rpl_gtid current_gtid; +}; + + // Defined in rpl_rli.cc int init_relay_log_info(Relay_log_info* rli, const char* info_fname); diff --git a/sql/rpl_utility.cc b/sql/rpl_utility.cc index 6bbe998a624..cce8ef99fef 100644 --- a/sql/rpl_utility.cc +++ b/sql/rpl_utility.cc @@ -1143,9 +1143,10 @@ bool Deferred_log_events::is_empty() return array.elements == 0; } -bool Deferred_log_events::execute(Relay_log_info *rli) +bool Deferred_log_events::execute(struct rpl_group_info *rgi) { bool res= false; + Relay_log_info *rli= rgi->rli; DBUG_ASSERT(rli->deferred_events_collecting); @@ -1154,7 +1155,7 @@ bool Deferred_log_events::execute(Relay_log_info *rli) { Log_event *ev= (* (Log_event **) dynamic_array_ptr(&array, i)); - res= ev->apply_event(rli); + res= ev->apply_event(rgi); } rli->deferred_events_collecting= true; return res; diff --git a/sql/rpl_utility.h b/sql/rpl_utility.h index 79f4517c492..893cc8d3e04 100644 --- a/sql/rpl_utility.h +++ b/sql/rpl_utility.h @@ -275,7 +275,7 @@ public: /* queue for exection at Query-log-event time prior the Query */ int add(Log_event *ev); bool is_empty(); - bool execute(Relay_log_info *rli); + bool execute(struct rpl_group_info *rgi); void rewind(); bool is_last(Log_event *ev) { return ev == last_added; }; }; diff --git a/sql/slave.cc b/sql/slave.cc index d7e4d9a25ed..ace5c7f837b 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -3018,10 +3018,12 @@ static int has_temporary_error(THD *thd) @retval 2 No error calling ev->apply_event(), but error calling ev->update_pos(). */ -int apply_event_and_update_pos(Log_event* ev, THD* thd, Relay_log_info* rli, +int apply_event_and_update_pos(Log_event* ev, THD* thd, + struct rpl_group_info *rgi, rpl_parallel_thread *rpt) { int exec_res= 0; + Relay_log_info* rli= rgi->rli; DBUG_ENTER("apply_event_and_update_pos"); @@ -3080,7 +3082,7 @@ int apply_event_and_update_pos(Log_event* ev, THD* thd, Relay_log_info* rli, } mysql_mutex_unlock(&rli->data_lock); if (reason == Log_event::EVENT_SKIP_NOT) - exec_res= ev->apply_event(rli); + exec_res= ev->apply_event(rgi); #ifndef DBUG_OFF /* @@ -3244,7 +3246,7 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli) if (opt_slave_parallel_threads > 0) DBUG_RETURN(rli->parallel.do_event(rli, ev, thd)); - exec_res= apply_event_and_update_pos(ev, thd, rli, NULL); + exec_res= apply_event_and_update_pos(ev, thd, rli->group_info, NULL); switch (ev->get_type_code()) { case FORMAT_DESCRIPTION_EVENT: @@ -5734,6 +5736,7 @@ static Log_event* next_event(Relay_log_info* rli) mysql_mutex_t *log_lock = rli->relay_log.get_log_lock(); const char* errmsg=0; THD* thd = rli->sql_thd; + struct rpl_group_info *rgi; DBUG_ENTER("next_event"); DBUG_ASSERT(thd != 0); @@ -5821,6 +5824,19 @@ static Log_event* next_event(Relay_log_info* rli) opt_slave_sql_verify_checksum))) { + if (!(rgi= rli->group_info)) + { + if (!(rgi= rli->group_info= (struct rpl_group_info *) + my_malloc(sizeof(*rgi), MYF(0)))) + { + errmsg = "slave SQL thread aborted because of out-of-memory error"; + if (hot_log) + mysql_mutex_unlock(log_lock); + goto err; + } + bzero(rgi, sizeof(*rgi)); + } + rgi->rli= rli; DBUG_ASSERT(thd==rli->sql_thd); /* read it while we have a lock, to avoid a mutex lock in @@ -5842,10 +5858,10 @@ static Log_event* next_event(Relay_log_info* rli) mysql_mutex_unlock(log_lock); goto err; } - rli->gtid_sub_id= sub_id; - rli->current_gtid.server_id= gev->server_id; - rli->current_gtid.domain_id= gev->domain_id; - rli->current_gtid.seq_no= gev->seq_no; + rgi->gtid_sub_id= sub_id; + rgi->current_gtid.server_id= gev->server_id; + rgi->current_gtid.domain_id= gev->domain_id; + rgi->current_gtid.seq_no= gev->seq_no; } if (hot_log) diff --git a/sql/slave.h b/sql/slave.h index 69b0e011a39..4e64754a877 100644 --- a/sql/slave.h +++ b/sql/slave.h @@ -228,7 +228,8 @@ int purge_relay_logs(Relay_log_info* rli, THD *thd, bool just_reset, void set_slave_thread_options(THD* thd); void set_slave_thread_default_charset(THD *thd, Relay_log_info const *rli); int rotate_relay_log(Master_info* mi); -int apply_event_and_update_pos(Log_event* ev, THD* thd, Relay_log_info* rli, +int apply_event_and_update_pos(Log_event* ev, THD* thd, + struct rpl_group_info *rgi, rpl_parallel_thread *rpt); pthread_handler_t handle_slave_io(void *arg); diff --git a/sql/sql_binlog.cc b/sql/sql_binlog.cc index 3bb5deab406..bef9a4c3475 100644 --- a/sql/sql_binlog.cc +++ b/sql/sql_binlog.cc @@ -44,6 +44,7 @@ void mysql_client_binlog_statement(THD* thd) { + struct rpl_group_info *rgi; DBUG_ENTER("mysql_client_binlog_statement"); DBUG_PRINT("info",("binlog base64: '%*s'", (int) (thd->lex->comment.length < 2048 ? @@ -196,6 +197,17 @@ void mysql_client_binlog_statement(THD* thd) } } + if (!(rgi= rli->group_info)) + { + if (!(rgi= rli->group_info= (struct rpl_group_info *) + my_malloc(sizeof(*rgi), MYF(0)))) + { + my_error(ER_OUTOFMEMORY, MYF(0), sizeof(*rgi)); + goto end; + } + bzero(rgi, sizeof(*rgi)); + } + rgi->rli= rli; ev= Log_event::read_log_event(bufptr, event_len, &error, rli->relay_log.description_event_for_exec, 0); @@ -232,7 +244,7 @@ void mysql_client_binlog_statement(THD* thd) (ev->flags & LOG_EVENT_SKIP_REPLICATION_F ? OPTION_SKIP_REPLICATION : 0); - err= ev->apply_event(rli); + err= ev->apply_event(rgi); thd->variables.option_bits= (thd->variables.option_bits & ~OPTION_SKIP_REPLICATION) | From 31a5edb5c27d2ecae8d19345e1a373d22246143a Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 3 Jul 2013 13:46:33 +0200 Subject: [PATCH 06/41] MDEV-4506: Parallel replication. Intermediate commit. Hook in the wait-for-prior-commit logic (not really tested yet). Clean up some resource maintenance around rpl_group_info (may still be some smaller issues there though). Add a ToDo list at the top of rpl_parallel.cc --- sql/log_event.h | 20 +++++ sql/mysqld.cc | 5 +- sql/mysqld.h | 2 +- sql/rpl_parallel.cc | 179 ++++++++++++++++++++++++++++++++++---------- sql/rpl_parallel.h | 11 ++- sql/rpl_rli.cc | 26 ++++++- sql/rpl_rli.h | 27 ++++++- sql/slave.cc | 70 ++++++++--------- sql/sql_class.cc | 7 ++ sql/sql_class.h | 1 + 10 files changed, 259 insertions(+), 89 deletions(-) diff --git a/sql/log_event.h b/sql/log_event.h index 8a60296695b..491666e2fdb 100644 --- a/sql/log_event.h +++ b/sql/log_event.h @@ -1376,6 +1376,26 @@ public: } } + static bool is_group_event(enum Log_event_type ev_type) + { + switch (ev_type) + { + case START_EVENT_V3: + case STOP_EVENT: + case ROTATE_EVENT: + case SLAVE_EVENT: + case FORMAT_DESCRIPTION_EVENT: + case INCIDENT_EVENT: + case HEARTBEAT_LOG_EVENT: + case BINLOG_CHECKPOINT_EVENT: + case GTID_LIST_EVENT: + return false; + + default: + return true; + } + } + protected: /** diff --git a/sql/mysqld.cc b/sql/mysqld.cc index bbb7c0d67bf..52c754993ac 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -772,7 +772,7 @@ PSI_mutex_key key_BINLOG_LOCK_index, key_BINLOG_LOCK_xid_list, key_PARTITION_LOCK_auto_inc; PSI_mutex_key key_RELAYLOG_LOCK_index; PSI_mutex_key key_LOCK_slave_state, key_LOCK_binlog_state, - key_LOCK_rpl_thread, key_LOCK_rpl_thread_pool; + key_LOCK_rpl_thread, key_LOCK_rpl_thread_pool, key_LOCK_parallel_entry; PSI_mutex_key key_LOCK_stats, key_LOCK_global_user_client_stats, key_LOCK_global_table_stats, @@ -850,7 +850,8 @@ static PSI_mutex_info all_server_mutexes[]= { &key_LOCK_slave_state, "LOCK_slave_state", 0}, { &key_LOCK_binlog_state, "LOCK_binlog_state", 0}, { &key_LOCK_rpl_thread, "LOCK_rpl_thread", 0}, - { &key_LOCK_rpl_thread_pool, "LOCK_rpl_thread_pool", 0} + { &key_LOCK_rpl_thread_pool, "LOCK_rpl_thread_pool", 0}, + { &key_LOCK_parallel_entry, "LOCK_parallel_entry", 0} }; PSI_rwlock_key key_rwlock_LOCK_grant, key_rwlock_LOCK_logger, diff --git a/sql/mysqld.h b/sql/mysqld.h index ed6d05807b0..d3b17cfefe1 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -249,7 +249,7 @@ extern PSI_mutex_key key_BINLOG_LOCK_index, key_BINLOG_LOCK_xid_list, key_LOCK_error_messages, key_LOCK_thread_count, key_PARTITION_LOCK_auto_inc; extern PSI_mutex_key key_RELAYLOG_LOCK_index; extern PSI_mutex_key key_LOCK_slave_state, key_LOCK_binlog_state, - key_LOCK_rpl_thread, key_LOCK_rpl_thread_pool; + key_LOCK_rpl_thread, key_LOCK_rpl_thread_pool, key_LOCK_parallel_entry; extern PSI_mutex_key key_LOCK_stats, key_LOCK_global_user_client_stats, key_LOCK_global_table_stats, diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index 8ea4799e94a..1a6eb9e3d50 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -4,6 +4,51 @@ #include "rpl_mi.h" +/* + Code for optional parallel execution of replicated events on the slave. + + ToDo list: + + - Review every field in Relay_log_info, and all code that accesses it. + Split out the necessary parts into rpl_group_info, to avoid conflicts + between parallel execution of events. (Such as deferred events ...) + + - Error handling. If we fail in one of multiple parallel executions, we + need to make a best effort to complete prior transactions and roll back + following transactions, so slave binlog position will be correct. + + - Stopping the slave needs to handle stopping all parallel executions. And + the logic in sql_slave_killed() that waits for current event group to + complete needs to be extended appropriately... + + - We need some user-configurable limit on how far ahead the SQL thread will + fetch and queue events for parallel execution (otherwise if slave gets + behind we will fill up memory with pending malloc()'ed events). + + - Fix update of relay-log.info and master.info. In non-GTID replication, + they must be serialised to preserve correctness. In GTID replication, we + should not update them at all except at slave thread stop. + + - All the waits (eg. in struct wait_for_commit and in + rpl_parallel_thread_pool::get_thread()) need to be killable. And on kill, + everything needs to be correctly rolled back and stopped in all threads, + to ensure a consistent slave replication state. + + - We need some knob on the master to allow the user to deliberately delay + commits waiting for more transactions to join group commit, to increase + potential for parallel execution on the slave. + + - Handle the case of a partial event group. This occurs when the master + crashes in the middle of writing the event group to the binlog. The + slave rolls back the transaction; parallel execution needs to be able + to deal with this wrt. commit_orderer and such. + + - We should fail if we connect to the master with opt_slave_parallel_threads + greater than zero and master does not support GTID. Just to avoid a bunch + of potential problems, we won't be able to do any parallel replication + in this case anyway. +*/ + struct rpl_parallel_thread_pool global_rpl_thread_pool; @@ -18,13 +63,14 @@ rpt_handle_event(rpl_parallel_thread::queued_event *qev, thd->rli_slave= rli; thd->rpl_filter = rli->mi->rpl_filter; + /* ToDo: Get rid of rli->group_info, it is not thread safe. */ + rli->group_info= rgi; + /* ToDo: Access to thd, and what about rli, split out a parallel part? */ mysql_mutex_lock(&rli->data_lock); err= apply_event_and_update_pos(qev->ev, thd, rgi, rpt); /* ToDo: error handling. */ /* ToDo: also free qev->ev, or hold on to it for a bit if necessary. */ - my_free(rgi); - rgi= NULL; } @@ -90,31 +136,72 @@ handle_rpl_parallel_thread(void *arg) { struct rpl_parallel_thread::queued_event *next= events->next; Log_event_type event_type= events->ev->get_type_code(); + rpl_group_info *rgi= events->rgi; + rpl_parallel_entry *entry= rgi->parallel_entry; + uint64 wait_for_sub_id; + if (event_type == GTID_EVENT) { + in_event_group= true; group_standalone= (0 != (static_cast(events->ev)->flags2 & Gtid_log_event::FL_STANDALONE)); - in_event_group= true; - } - else - { - if (group_standalone) + + /* + Register ourself to wait for the previous commit, if we need to do + such registration _and_ that previous commit has not already + occured. + */ + if ((wait_for_sub_id= rgi->wait_commit_sub_id)) { - if (!Log_event::is_part_of_group(event_type)) - in_event_group= false; - } - else if (event_type == XID_EVENT) - in_event_group= false; - else if (event_type == QUERY_EVENT) - { - Query_log_event *query= static_cast(events->ev); - if (!strcmp("COMMIT", query->query) || - !strcmp("ROLLBACK", query->query)) - in_event_group= false; + mysql_mutex_lock(&entry->LOCK_parallel_entry); + if (wait_for_sub_id > entry->last_committed_sub_id) + { + wait_for_commit *waitee= + &rgi->wait_commit_group_info->commit_orderer; + rgi->commit_orderer.register_wait_for_prior_commit(waitee); + } + mysql_mutex_unlock(&entry->LOCK_parallel_entry); } + + DBUG_ASSERT(!thd->wait_for_commit_ptr); + thd->wait_for_commit_ptr= &rgi->commit_orderer; } + rpt_handle_event(events, thd, rpt); + + if (in_event_group) + { + if ((group_standalone && !Log_event::is_part_of_group(event_type)) || + event_type == XID_EVENT || + (event_type == QUERY_EVENT && + (!strcmp("COMMIT", ((Query_log_event *)events->ev)->query) || + !strcmp("ROLLBACK", ((Query_log_event *)events->ev)->query)))) + { + in_event_group= false; + + rgi->commit_orderer.unregister_wait_for_prior_commit(); + thd->wait_for_commit_ptr= NULL; + + /* + Record that we have finished, so other event groups will no + longer attempt to wait for us to commit. + + We can race here with the next transactions, but that is fine, as + long as we check that we do not decrease last_committed_sub_id. If + this commit is done, then any prior commits will also have been + done and also no longer need waiting for. + */ + mysql_mutex_lock(&entry->LOCK_parallel_entry); + if (entry->last_committed_sub_id < rgi->gtid_sub_id) + entry->last_committed_sub_id= rgi->gtid_sub_id; + mysql_mutex_unlock(&entry->LOCK_parallel_entry); + + rgi->commit_orderer.wakeup_subsequent_commits(); + delete rgi; + } + } + my_free(events); events= next; } @@ -365,19 +452,17 @@ rpl_parallel::find(uint32 domain_id) (const uchar *)&domain_id, 0))) { /* Allocate a new, empty one. */ - if (!(e= (struct rpl_parallel_entry *)my_malloc(sizeof(*e), MYF(0)))) + if (!(e= (struct rpl_parallel_entry *)my_malloc(sizeof(*e), + MYF(MY_ZEROFILL)))) return NULL; e->domain_id= domain_id; - e->last_server_id= 0; - e->last_seq_no= 0; - e->last_commit_id= 0; - e->active= false; - e->rpl_thread= NULL; if (my_hash_insert(&domain_hash, (uchar *)e)) { my_free(e); return NULL; } + mysql_mutex_init(key_LOCK_parallel_entry, &e->LOCK_parallel_entry, + MY_MUTEX_INIT_FAST); } return e; @@ -385,11 +470,15 @@ rpl_parallel::find(uint32 domain_id) bool -rpl_parallel::do_event(Relay_log_info *rli, Log_event *ev, THD *parent_thd) +rpl_parallel::do_event(struct rpl_group_info *serial_rgi, Log_event *ev, + THD *parent_thd) { rpl_parallel_entry *e; rpl_parallel_thread *cur_thread; rpl_parallel_thread::queued_event *qev; + struct rpl_group_info *rgi; + Relay_log_info *rli= serial_rgi->rli; + enum Log_event_type typ; /* ToDo: what to do with this lock?!? */ mysql_mutex_unlock(&rli->data_lock); @@ -401,17 +490,17 @@ rpl_parallel::do_event(Relay_log_info *rli, Log_event *ev, THD *parent_thd) return true; } qev->ev= ev; - qev->rgi= rli->group_info; - rli->group_info= NULL; /* Avoid conflict with groups applied in parallel */ qev->next= NULL; - if (ev->get_type_code() == GTID_EVENT) + if ((typ= ev->get_type_code()) == GTID_EVENT) { Gtid_log_event *gtid_ev= static_cast(ev); - if (!(e= find(gtid_ev->domain_id))) + if (!(e= find(gtid_ev->domain_id)) || + !(e->current_group_info= rgi= new rpl_group_info(rli)) || + event_group_new_gtid(rgi, gtid_ev)) { - my_error(ER_OUT_OF_RESOURCES, MYF(0)); + my_error(ER_OUT_OF_RESOURCES, MYF(MY_WME)); return true; } @@ -448,7 +537,7 @@ rpl_parallel::do_event(Relay_log_info *rli, Log_event *ev, THD *parent_thd) e->last_commit_id= 0; } cur_thread= e->rpl_thread= global_rpl_thread_pool.get_thread(e); - e->rpl_thread->wait_for= NULL; /* ToDo */ + rgi->wait_commit_sub_id= 0; /* get_thread() returns with the LOCK_rpl_thread locked. */ } else if ((gtid_ev->flags & Gtid_log_event::FL_GROUP_COMMIT_ID) && @@ -464,8 +553,8 @@ rpl_parallel::do_event(Relay_log_info *rli, Log_event *ev, THD *parent_thd) servers in the replication hierarchy. */ rpl_parallel_thread *rpt= global_rpl_thread_pool.get_thread(e); - rpt->wait_for= cur_thread; /* ToDo */ - mysql_mutex_unlock(&cur_thread->LOCK_rpl_thread); + rgi->wait_commit_sub_id= e->current_sub_id; + rgi->wait_commit_group_info= e->current_group_info; e->rpl_thread= cur_thread= rpt; /* get_thread() returns with the LOCK_rpl_thread locked. */ } @@ -476,18 +565,25 @@ rpl_parallel::do_event(Relay_log_info *rli, Log_event *ev, THD *parent_thd) domain, and we have to wait for that to finish before we can start on the next one. So just re-use the thread. */ + rgi->wait_commit_sub_id= 0; } - current= e; + e->current_sub_id= rgi->gtid_sub_id; + current= rgi->parallel_entry= e; + } + else if (!Log_event::is_group_event(typ) || !current) + { + /* + Events like ROTATE and FORMAT_DESCRIPTION. Do not run in worker thread. + Same for events not preceeded by GTID (we should not see those normally, + but they might be from an old master). + */ + qev->rgi= serial_rgi; + rpt_handle_event(qev, parent_thd, NULL); + return false; } else { - if (!current) - { - /* We have no domain_id yet, just run non-parallel. */ - rpt_handle_event(qev, parent_thd, NULL); - return false; - } cur_thread= current->rpl_thread; if (cur_thread) { @@ -503,9 +599,10 @@ rpl_parallel::do_event(Relay_log_info *rli, Log_event *ev, THD *parent_thd) { cur_thread= current->rpl_thread= global_rpl_thread_pool.get_thread(current); - cur_thread->wait_for= NULL; /* ToDo */ } } + qev->rgi= current->current_group_info; + /* Queue the event for processing. */ diff --git a/sql/rpl_parallel.h b/sql/rpl_parallel.h index c5bb39cb6fc..b0367efdea6 100644 --- a/sql/rpl_parallel.h +++ b/sql/rpl_parallel.h @@ -25,7 +25,6 @@ struct rpl_parallel_thread { Log_event *ev; struct rpl_group_info *rgi; } *event_queue, *last_in_queue; - rpl_parallel_thread *wait_for; /* ToDo: change this ... */ }; @@ -52,6 +51,14 @@ struct rpl_parallel_entry { uint64 last_commit_id; bool active; rpl_parallel_thread *rpl_thread; + /* + The sub_id of the last transaction to commit within this domain_id. + Must be accessed under LOCK_parallel_entry protection. + */ + uint64 last_committed_sub_id; + mysql_mutex_t LOCK_parallel_entry; + uint64 current_sub_id; + struct rpl_group_info *current_group_info; }; struct rpl_parallel { HASH domain_hash; @@ -60,7 +67,7 @@ struct rpl_parallel { rpl_parallel(); ~rpl_parallel(); rpl_parallel_entry *find(uint32 domain_id); - bool do_event(Relay_log_info *rli, Log_event *ev, THD *thd); + bool do_event(struct rpl_group_info *serial_rgi, Log_event *ev, THD *thd); }; diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index 5d5bca1189c..264c0b2cc22 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -113,8 +113,6 @@ Relay_log_info::~Relay_log_info() mysql_cond_destroy(&sleep_cond); relay_log.cleanup(); free_annotate_event(); - if (group_info) - my_free(group_info); DBUG_VOID_RETURN; } @@ -1532,4 +1530,28 @@ end: DBUG_RETURN(err); } + +rpl_group_info::rpl_group_info(Relay_log_info *rli_) + : rli(rli_), gtid_sub_id(0), wait_commit_sub_id(0), wait_commit_group_info(0), + parallel_entry(0) +{ + bzero(¤t_gtid, sizeof(current_gtid)); +} + + +int +event_group_new_gtid(rpl_group_info *rgi, Gtid_log_event *gev) +{ + uint64 sub_id= rpl_global_gtid_slave_state.next_subid(gev->domain_id); + if (!sub_id) + { + return 1; + } + rgi->gtid_sub_id= sub_id; + rgi->current_gtid.server_id= gev->server_id; + rgi->current_gtid.domain_id= gev->domain_id; + rgi->current_gtid.seq_no= gev->seq_no; + return 0; +} + #endif diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h index c02ae6e3adb..f1f96344c65 100644 --- a/sql/rpl_rli.h +++ b/sql/rpl_rli.h @@ -314,7 +314,7 @@ public: char slave_patternload_file[FN_REFLEN]; size_t slave_patternload_file_size; - /* Various data related to the currently executing event group. */ + /* ToDo: We need to remove this, always use the per-transaction one to work with parallel replication. */ struct rpl_group_info *group_info; rpl_parallel parallel; @@ -610,6 +610,30 @@ struct rpl_group_info */ uint64 gtid_sub_id; rpl_gtid current_gtid; + /* + This is used to keep transaction commit order. + We will signal this when we commit, and can register it to wait for the + commit_orderer of the previous commit to signal us. + */ + wait_for_commit commit_orderer; + /* + If non-zero, the sub_id of a prior event group whose commit we have to wait + for before committing ourselves. Then wait_commit_group_info points to the + event group to wait for. + + Before using this, rpl_parallel_entry::last_committed_sub_id should be + compared against wait_commit_sub_id. Only if last_committed_sub_id is + smaller than wait_commit_sub_id must the wait be done (otherwise the + waited-for transaction is already committed, so we would otherwise wait + for the wrong commit). + */ + uint64 wait_commit_sub_id; + struct rpl_group_info *wait_commit_group_info; + + struct rpl_parallel_entry *parallel_entry; + + rpl_group_info(Relay_log_info *rli); + ~rpl_group_info() { }; }; @@ -620,5 +644,6 @@ int init_relay_log_info(Relay_log_info* rli, const char* info_fname); extern struct rpl_slave_state rpl_global_gtid_slave_state; int rpl_load_gtid_slave_state(THD *thd); +int event_group_new_gtid(rpl_group_info *rgi, Gtid_log_event *gev); #endif /* RPL_RLI_H */ diff --git a/sql/slave.cc b/sql/slave.cc index ace5c7f837b..072ec90076d 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -3177,7 +3177,8 @@ int apply_event_and_update_pos(Log_event* ev, THD* thd, @retval 1 The event was not applied. */ -static int exec_relay_log_event(THD* thd, Relay_log_info* rli) +static int exec_relay_log_event(THD* thd, Relay_log_info* rli, + rpl_group_info *serial_rgi) { DBUG_ENTER("exec_relay_log_event"); @@ -3201,6 +3202,7 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli) if (ev) { int exec_res; + Log_event_type typ= ev->get_type_code(); /* This tests if the position of the beginning of the current event @@ -3230,8 +3232,8 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli) read hanging if the realy log does not have any more events. */ DBUG_EXECUTE_IF("incomplete_group_in_relay_log", - if ((ev->get_type_code() == XID_EVENT) || - ((ev->get_type_code() == QUERY_EVENT) && + if ((typ == XID_EVENT) || + ((typ == QUERY_EVENT) && strcmp("COMMIT", ((Query_log_event *) ev)->query) == 0)) { DBUG_ASSERT(thd->transaction.all.modified_non_trans_table); @@ -3244,11 +3246,25 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli) } if (opt_slave_parallel_threads > 0) - DBUG_RETURN(rli->parallel.do_event(rli, ev, thd)); + DBUG_RETURN(rli->parallel.do_event(serial_rgi, ev, thd)); + + /* + For GTID, allocate a new sub_id for the given domain_id. + The sub_id must be allocated in increasing order of binlog order. + */ + if (typ == GTID_EVENT && + event_group_new_gtid(serial_rgi, static_cast(ev))) + { + sql_print_error("Error reading relay log event: %s", + "slave SQL thread aborted because of out-of-memory error"); + mysql_mutex_unlock(&rli->data_lock); + delete ev; + DBUG_RETURN(1); + } exec_res= apply_event_and_update_pos(ev, thd, rli->group_info, NULL); - switch (ev->get_type_code()) { + switch (typ) { case FORMAT_DESCRIPTION_EVENT: /* Format_description_log_event should not be deleted because it @@ -4001,6 +4017,7 @@ pthread_handler_t handle_slave_sql(void *arg) Master_info *mi= ((Master_info*)arg); Relay_log_info* rli = &mi->rli; const char *errmsg; + rpl_group_info serial_rgi(rli); // needs to call my_thread_init(), otherwise we get a coredump in DBUG_ stuff my_thread_init(); @@ -4205,6 +4222,13 @@ log '%s' at position %s, relay log '%s' position: %s%s", RPL_LOG_NAME, } mysql_mutex_unlock(&rli->data_lock); + /* + ToDo: Get rid of this, all accesses to rpl_group_info must be made + per-worker-thread to work with parallel replication. + */ + if (opt_slave_parallel_threads <= 0) + rli->group_info= &serial_rgi; + /* Read queries from the IO/THREAD until this thread is killed */ while (!sql_slave_killed(thd,rli)) @@ -4227,7 +4251,7 @@ log '%s' at position %s, relay log '%s' position: %s%s", RPL_LOG_NAME, saved_skip= 0; } - if (exec_relay_log_event(thd,rli)) + if (exec_relay_log_event(thd, rli, &serial_rgi)) { DBUG_PRINT("info", ("exec_relay_log_event() failed")); // do not scare the user if SQL thread was simply killed or stopped @@ -5736,7 +5760,6 @@ static Log_event* next_event(Relay_log_info* rli) mysql_mutex_t *log_lock = rli->relay_log.get_log_lock(); const char* errmsg=0; THD* thd = rli->sql_thd; - struct rpl_group_info *rgi; DBUG_ENTER("next_event"); DBUG_ASSERT(thd != 0); @@ -5824,45 +5847,12 @@ static Log_event* next_event(Relay_log_info* rli) opt_slave_sql_verify_checksum))) { - if (!(rgi= rli->group_info)) - { - if (!(rgi= rli->group_info= (struct rpl_group_info *) - my_malloc(sizeof(*rgi), MYF(0)))) - { - errmsg = "slave SQL thread aborted because of out-of-memory error"; - if (hot_log) - mysql_mutex_unlock(log_lock); - goto err; - } - bzero(rgi, sizeof(*rgi)); - } - rgi->rli= rli; DBUG_ASSERT(thd==rli->sql_thd); /* read it while we have a lock, to avoid a mutex lock in inc_event_relay_log_pos() */ rli->future_event_relay_log_pos= my_b_tell(cur_log); - /* - For GTID, allocate a new sub_id for the given domain_id. - The sub_id must be allocated in increasing order of binlog order. - */ - if (ev->get_type_code() == GTID_EVENT) - { - Gtid_log_event *gev= static_cast(ev); - uint64 sub_id= rpl_global_gtid_slave_state.next_subid(gev->domain_id); - if (!sub_id) - { - errmsg = "slave SQL thread aborted because of out-of-memory error"; - if (hot_log) - mysql_mutex_unlock(log_lock); - goto err; - } - rgi->gtid_sub_id= sub_id; - rgi->current_gtid.server_id= gev->server_id; - rgi->current_gtid.domain_id= gev->domain_id; - rgi->current_gtid.seq_no= gev->seq_no; - } if (hot_log) mysql_mutex_unlock(log_lock); diff --git a/sql/sql_class.cc b/sql/sql_class.cc index fa53b38ab70..aec65dc385c 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -5602,6 +5602,13 @@ wait_for_commit::wait_for_commit() } +wait_for_commit::~wait_for_commit() +{ + mysql_mutex_destroy(&LOCK_wait_commit); + mysql_cond_destroy(&COND_wait_commit); +} + + void wait_for_commit::wakeup() { diff --git a/sql/sql_class.h b/sql/sql_class.h index 4e1917f62b7..3b7cfb42ec7 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -1659,6 +1659,7 @@ struct wait_for_commit void unregister_wait_for_prior_commit2(); wait_for_commit(); + ~wait_for_commit(); }; From 592e464a021747d7ac5b13222f5de1cd4250531c Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 3 Jul 2013 19:03:21 +0200 Subject: [PATCH 07/41] MDEV-4506: Parallel replication. Intermediate commit. Pass down rpl_group_info * to remove one instance of non-threadsafe use of rli->group_info. --- sql/log_event.cc | 48 +++++++++++++++++++++++++++----------------- sql/log_event.h | 28 +++++++++++++------------- sql/log_event_old.cc | 11 ++++++---- sql/log_event_old.h | 18 ++++++++--------- sql/rpl_gtid.cc | 5 ++--- sql/rpl_gtid.h | 2 +- sql/rpl_rli.cc | 6 ++++-- sql/rpl_rli.h | 3 ++- sql/slave.cc | 4 ++-- 9 files changed, 71 insertions(+), 54 deletions(-) diff --git a/sql/log_event.cc b/sql/log_event.cc index 8bbc43dec35..1f8685e34b8 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -937,8 +937,9 @@ Log_event::Log_event(const char* buf, #ifndef MYSQL_CLIENT #ifdef HAVE_REPLICATION -int Log_event::do_update_pos(Relay_log_info *rli) +int Log_event::do_update_pos(struct rpl_group_info *rgi) { + Relay_log_info *rli= rgi->rli; /* rli is null when (as far as I (Guilhem) know) the caller is Load_log_event::do_apply_event *and* that one is called from @@ -967,7 +968,7 @@ int Log_event::do_update_pos(Relay_log_info *rli) (is_artificial_event() && IF_DBUG(debug_not_change_ts_if_art_event > 0, 1) ? 0 : when), - thd); + thd, rgi); DBUG_EXECUTE_IF("let_first_flush_log_change_timestamp", if (debug_not_change_ts_if_art_event == 0) debug_not_change_ts_if_art_event= 2; ); @@ -4243,8 +4244,9 @@ end: DBUG_RETURN(thd->is_slave_error); } -int Query_log_event::do_update_pos(Relay_log_info *rli) +int Query_log_event::do_update_pos(struct rpl_group_info *rgi) { + Relay_log_info *rli= rgi->rli; /* Note that we will not increment group* positions if we are just after a SET ONE_SHOT, because SET ONE_SHOT should not be separated @@ -4256,7 +4258,7 @@ int Query_log_event::do_update_pos(Relay_log_info *rli) return 0; } else - return Log_event::do_update_pos(rli); + return Log_event::do_update_pos(rgi); } @@ -4865,8 +4867,9 @@ int Format_description_log_event::do_apply_event(struct rpl_group_info *rgi) DBUG_RETURN(ret); } -int Format_description_log_event::do_update_pos(Relay_log_info *rli) +int Format_description_log_event::do_update_pos(struct rpl_group_info *rgi) { + Relay_log_info *rli= rgi->rli; if (server_id == (uint32) global_system_variables.server_id) { /* @@ -4887,7 +4890,7 @@ int Format_description_log_event::do_update_pos(Relay_log_info *rli) } else { - return Log_event::do_update_pos(rli); + return Log_event::do_update_pos(rgi); } } @@ -5916,8 +5919,9 @@ bool Rotate_log_event::write(IO_CACHE* file) @retval 0 ok */ -int Rotate_log_event::do_update_pos(Relay_log_info *rli) +int Rotate_log_event::do_update_pos(struct rpl_group_info *rgi) { + Relay_log_info *rli= rgi->rli; DBUG_ENTER("Rotate_log_event::do_update_pos"); #ifndef DBUG_OFF char buf[32]; @@ -5962,7 +5966,7 @@ int Rotate_log_event::do_update_pos(Relay_log_info *rli) rli->group_master_log_name, (ulong) rli->group_master_log_pos)); mysql_mutex_unlock(&rli->data_lock); - rpl_global_gtid_slave_state.record_and_update_gtid(thd, rli); + rpl_global_gtid_slave_state.record_and_update_gtid(thd, rgi); flush_relay_log_info(rli); /* @@ -6291,8 +6295,9 @@ Gtid_log_event::do_apply_event(struct rpl_group_info *rgi) int -Gtid_log_event::do_update_pos(Relay_log_info *rli) +Gtid_log_event::do_update_pos(struct rpl_group_info *rgi) { + Relay_log_info *rli= rgi->rli; rli->inc_event_relay_log_pos(); return 0; } @@ -6726,8 +6731,9 @@ int Intvar_log_event::do_apply_event(struct rpl_group_info *rgi) return 0; } -int Intvar_log_event::do_update_pos(Relay_log_info *rli) +int Intvar_log_event::do_update_pos(struct rpl_group_info *rgi) { + Relay_log_info *rli= rgi->rli; rli->inc_event_relay_log_pos(); return 0; } @@ -6829,8 +6835,9 @@ int Rand_log_event::do_apply_event(struct rpl_group_info *rgi) return 0; } -int Rand_log_event::do_update_pos(Relay_log_info *rli) +int Rand_log_event::do_update_pos(struct rpl_group_info *rgi) { + Relay_log_info *rli= rgi->rli; rli->inc_event_relay_log_pos(); return 0; } @@ -7498,8 +7505,9 @@ int User_var_log_event::do_apply_event(struct rpl_group_info *rgi) DBUG_RETURN(0); } -int User_var_log_event::do_update_pos(Relay_log_info *rli) +int User_var_log_event::do_update_pos(struct rpl_group_info *rgi) { + Relay_log_info *rli= rgi->rli; rli->inc_event_relay_log_pos(); return 0; } @@ -7718,8 +7726,9 @@ void Stop_log_event::print(FILE* file, PRINT_EVENT_INFO* print_event_info) Start_log_event_v3::do_apply_event(), not here. Because if we come here, the master was sane. */ -int Stop_log_event::do_update_pos(Relay_log_info *rli) +int Stop_log_event::do_update_pos(struct rpl_group_info *rgi) { + Relay_log_info *rli= rgi->rli; /* We do not want to update master_log pos because we get a rotate event before stop, so by now group_master_log_name is set to the next log. @@ -7731,7 +7740,7 @@ int Stop_log_event::do_update_pos(Relay_log_info *rli) rli->inc_event_relay_log_pos(); else { - rpl_global_gtid_slave_state.record_and_update_gtid(thd, rli); + rpl_global_gtid_slave_state.record_and_update_gtid(thd, rgi); rli->inc_group_relay_log_pos(0); flush_relay_log_info(rli); } @@ -9529,8 +9538,9 @@ static int rows_event_stmt_cleanup(Relay_log_info const *rli, THD * thd) @retval non-zero Error in the statement commit */ int -Rows_log_event::do_update_pos(Relay_log_info *rli) +Rows_log_event::do_update_pos(struct rpl_group_info *rgi) { + Relay_log_info *rli= rgi->rli; DBUG_ENTER("Rows_log_event::do_update_pos"); int error= 0; @@ -9544,7 +9554,7 @@ Rows_log_event::do_update_pos(Relay_log_info *rli) Step the group log position if we are not in a transaction, otherwise increase the event log position. */ - rli->stmt_done(log_pos, when, thd); + rli->stmt_done(log_pos, when, thd, rgi); /* Clear any errors in thd->net.last_err*. It is not known if this is needed or not. It is believed that any errors that may exist in @@ -9777,8 +9787,9 @@ int Annotate_rows_log_event::do_apply_event(struct rpl_group_info *rgi) #endif #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) -int Annotate_rows_log_event::do_update_pos(Relay_log_info *rli) +int Annotate_rows_log_event::do_update_pos(struct rpl_group_info *rgi) { + Relay_log_info *rli= rgi->rli; rli->inc_event_relay_log_pos(); return 0; } @@ -10404,8 +10415,9 @@ Table_map_log_event::do_shall_skip(Relay_log_info *rli) return continue_group(rli); } -int Table_map_log_event::do_update_pos(Relay_log_info *rli) +int Table_map_log_event::do_update_pos(struct rpl_group_info *rgi) { + Relay_log_info *rli= rgi->rli; rli->inc_event_relay_log_pos(); return 0; } diff --git a/sql/log_event.h b/sql/log_event.h index 491666e2fdb..8bda493a7ec 100644 --- a/sql/log_event.h +++ b/sql/log_event.h @@ -1331,9 +1331,9 @@ public: @see do_update_pos */ - int update_pos(Relay_log_info *rli) + int update_pos(struct rpl_group_info *rgi) { - return do_update_pos(rli); + return do_update_pos(rgi); } /** @@ -1461,7 +1461,7 @@ protected: 1). Observe that handler errors are returned by the do_apply_event() function, and not by this one. */ - virtual int do_update_pos(Relay_log_info *rli); + virtual int do_update_pos(struct rpl_group_info *rgi); /** @@ -1987,7 +1987,7 @@ public: /* !!! Public in this patch to allow old usage */ #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); virtual int do_apply_event(struct rpl_group_info *rgi); - virtual int do_update_pos(Relay_log_info *rli); + virtual int do_update_pos(struct rpl_group_info *rgi); int do_apply_event(struct rpl_group_info *rgi, const char *query_arg, @@ -2597,7 +2597,7 @@ public: protected: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) virtual int do_apply_event(struct rpl_group_info *rgi); - virtual int do_update_pos(Relay_log_info *rli); + virtual int do_update_pos(struct rpl_group_info *rgi); virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); #endif }; @@ -2676,7 +2676,7 @@ Intvar_log_event(THD* thd_arg,uchar type_arg, ulonglong val_arg, private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) virtual int do_apply_event(struct rpl_group_info *rgi); - virtual int do_update_pos(Relay_log_info *rli); + virtual int do_update_pos(struct rpl_group_info *rgi); virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); #endif }; @@ -2755,7 +2755,7 @@ class Rand_log_event: public Log_event private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) virtual int do_apply_event(struct rpl_group_info *rgi); - virtual int do_update_pos(Relay_log_info *rli); + virtual int do_update_pos(struct rpl_group_info *rgi); virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); #endif }; @@ -2871,7 +2871,7 @@ public: private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) virtual int do_apply_event(struct rpl_group_info *rgi); - virtual int do_update_pos(Relay_log_info *rli); + virtual int do_update_pos(struct rpl_group_info *rgi); virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); #endif }; @@ -2905,7 +2905,7 @@ public: private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) - virtual int do_update_pos(Relay_log_info *rli); + virtual int do_update_pos(struct rpl_group_info *rgi); virtual enum_skip_reason do_shall_skip(Relay_log_info *rli) { /* @@ -3007,7 +3007,7 @@ public: private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) - virtual int do_update_pos(Relay_log_info *rli); + virtual int do_update_pos(struct rpl_group_info *rgi); virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); #endif }; @@ -3120,7 +3120,7 @@ public: #ifdef HAVE_REPLICATION void pack_info(THD *thd, Protocol *protocol); virtual int do_apply_event(struct rpl_group_info *rgi); - virtual int do_update_pos(Relay_log_info *rli); + virtual int do_update_pos(struct rpl_group_info *rgi); virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); #endif #else @@ -3636,7 +3636,7 @@ public: #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) private: virtual int do_apply_event(struct rpl_group_info *rgi); - virtual int do_update_pos(Relay_log_info*); + virtual int do_update_pos(struct rpl_group_info *rgi); virtual enum_skip_reason do_shall_skip(Relay_log_info*); #endif @@ -4051,7 +4051,7 @@ public: private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) virtual int do_apply_event(struct rpl_group_info *rgi); - virtual int do_update_pos(Relay_log_info *rli); + virtual int do_update_pos(struct rpl_group_info *rgi); virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); #endif @@ -4279,7 +4279,7 @@ private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) virtual int do_apply_event(struct rpl_group_info *rgi); - virtual int do_update_pos(Relay_log_info *rli); + virtual int do_update_pos(struct rpl_group_info *rgi); virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); /* diff --git a/sql/log_event_old.cc b/sql/log_event_old.cc index 698118e3bda..4be3e2720de 100644 --- a/sql/log_event_old.cc +++ b/sql/log_event_old.cc @@ -36,12 +36,13 @@ // Old implementation of do_apply_event() int -Old_rows_log_event::do_apply_event(Old_rows_log_event *ev, const Relay_log_info *rli) +Old_rows_log_event::do_apply_event(Old_rows_log_event *ev, struct rpl_group_info *rgi) { DBUG_ENTER("Old_rows_log_event::do_apply_event(st_relay_log_info*)"); int error= 0; THD *ev_thd= ev->thd; uchar const *row_start= ev->m_rows_buf; + const Relay_log_info *rli= rgi->rli; /* If m_table_id == ~0UL, then we have a dummy event that does not @@ -1450,10 +1451,11 @@ int Old_rows_log_event::do_add_row_data(uchar *row_data, size_t length) #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) -int Old_rows_log_event::do_apply_event(Relay_log_info const *rli) +int Old_rows_log_event::do_apply_event(struct rpl_group_info *rgi) { DBUG_ENTER("Old_rows_log_event::do_apply_event(Relay_log_info*)"); int error= 0; + Relay_log_info const *rli= rgi->rli; /* If m_table_id == ~0UL, then we have a dummy event that does not @@ -1832,8 +1834,9 @@ Old_rows_log_event::do_shall_skip(Relay_log_info *rli) } int -Old_rows_log_event::do_update_pos(Relay_log_info *rli) +Old_rows_log_event::do_update_pos(struct rpl_group_info *rgi) { + Relay_log_info *rli= rgi->rli; DBUG_ENTER("Old_rows_log_event::do_update_pos"); int error= 0; @@ -1847,7 +1850,7 @@ Old_rows_log_event::do_update_pos(Relay_log_info *rli) Step the group log position if we are not in a transaction, otherwise increase the event log position. */ - rli->stmt_done(log_pos, when, thd); + rli->stmt_done(log_pos, when, thd, rgi); /* Clear any errors in thd->net.last_err*. It is not known if this is needed or not. It is believed that any errors that may exist in diff --git a/sql/log_event_old.h b/sql/log_event_old.h index 3e1efd8e2c0..ad51349ef80 100644 --- a/sql/log_event_old.h +++ b/sql/log_event_old.h @@ -214,8 +214,8 @@ protected: private: #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) - virtual int do_apply_event(Relay_log_info const *rli); - virtual int do_update_pos(Relay_log_info *rli); + virtual int do_apply_event(struct rpl_group_info *rgi); + virtual int do_update_pos(struct rpl_group_info *rgi); virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); /* @@ -275,7 +275,7 @@ private: #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) - int do_apply_event(Old_rows_log_event*,const Relay_log_info*); + int do_apply_event(Old_rows_log_event*, struct rpl_group_info *rgi); /* Primitive to prepare for a sequence of row executions. @@ -403,8 +403,8 @@ private: #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) // use old definition of do_apply_event() - virtual int do_apply_event(const Relay_log_info *rli) - { return Old_rows_log_event::do_apply_event(this,rli); } + virtual int do_apply_event(struct rpl_group_info *rgi) + { return Old_rows_log_event::do_apply_event(this, rgi); } // primitives for old version of do_apply_event() virtual int do_before_row_operations(TABLE *table); @@ -481,8 +481,8 @@ private: #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) // use old definition of do_apply_event() - virtual int do_apply_event(const Relay_log_info *rli) - { return Old_rows_log_event::do_apply_event(this,rli); } + virtual int do_apply_event(struct rpl_group_info *rgi) + { return Old_rows_log_event::do_apply_event(this, rgi); } // primitives for old version of do_apply_event() virtual int do_before_row_operations(TABLE *table); @@ -556,8 +556,8 @@ private: #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) // use old definition of do_apply_event() - virtual int do_apply_event(const Relay_log_info *rli) - { return Old_rows_log_event::do_apply_event(this,rli); } + virtual int do_apply_event(struct rpl_group_info *rgi) + { return Old_rows_log_event::do_apply_event(this, rgi); } // primitives for old version of do_apply_event() virtual int do_before_row_operations(TABLE *table); diff --git a/sql/rpl_gtid.cc b/sql/rpl_gtid.cc index 54d3b704a2c..bc826e9bdb5 100644 --- a/sql/rpl_gtid.cc +++ b/sql/rpl_gtid.cc @@ -62,16 +62,15 @@ rpl_slave_state::update_state_hash(uint64 sub_id, rpl_gtid *gtid) int -rpl_slave_state::record_and_update_gtid(THD *thd, Relay_log_info *rli) +rpl_slave_state::record_and_update_gtid(THD *thd, struct rpl_group_info *rgi) { uint64 sub_id; - struct rpl_group_info *rgi; /* Update the GTID position, if we have it and did not already update it in a GTID transaction. */ - if ((rgi= rli->group_info) && (sub_id= rgi->gtid_sub_id)) + if ((sub_id= rgi->gtid_sub_id)) { rgi->gtid_sub_id= 0; if (record_gtid(thd, &rgi->current_gtid, sub_id, false, false)) diff --git a/sql/rpl_gtid.h b/sql/rpl_gtid.h index 4d5302020bf..525b34cb160 100644 --- a/sql/rpl_gtid.h +++ b/sql/rpl_gtid.h @@ -108,7 +108,7 @@ struct rpl_slave_state int put_back_list(uint32 domain_id, list_element *list); void update_state_hash(uint64 sub_id, rpl_gtid *gtid); - int record_and_update_gtid(THD *thd, Relay_log_info *rli); + int record_and_update_gtid(THD *thd, struct rpl_group_info *rgi); }; diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index 264c0b2cc22..bbf10dbcd51 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -1194,13 +1194,15 @@ bool Relay_log_info::cached_charset_compare(char *charset) const void Relay_log_info::stmt_done(my_off_t event_master_log_pos, - time_t event_creation_time, THD *thd) + time_t event_creation_time, THD *thd, + struct rpl_group_info *rgi) { #ifndef DBUG_OFF extern uint debug_not_change_ts_if_art_event; #endif clear_flag(IN_STMT); + DBUG_ASSERT(rgi->rli == this); /* If in a transaction, and if the slave supports transactions, just inc_event_relay_log_pos(). We only have to check for OPTION_BEGIN @@ -1229,7 +1231,7 @@ void Relay_log_info::stmt_done(my_off_t event_master_log_pos, else { inc_group_relay_log_pos(event_master_log_pos); - if (rpl_global_gtid_slave_state.record_and_update_gtid(thd, this)) + if (rpl_global_gtid_slave_state.record_and_update_gtid(thd, rgi)) { report(WARNING_LEVEL, ER_CANNOT_UPDATE_GTID_STATE, "Failed to update GTID state in %s.%s, slave state may become " diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h index f1f96344c65..b4daecadea8 100644 --- a/sql/rpl_rli.h +++ b/sql/rpl_rli.h @@ -458,7 +458,8 @@ public: the Seconds_behind_master field. */ void stmt_done(my_off_t event_log_pos, - time_t event_creation_time, THD *thd); + time_t event_creation_time, THD *thd, + struct rpl_group_info *rgi); /** diff --git a/sql/slave.cc b/sql/slave.cc index 072ec90076d..ba4fef03639 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -3108,7 +3108,7 @@ int apply_event_and_update_pos(Log_event* ev, THD* thd, DBUG_PRINT("info", ("apply_event error = %d", exec_res)); if (exec_res == 0) { - int error= ev->update_pos(rli); + int error= ev->update_pos(rgi); #ifdef HAVE_valgrind if (!rli->is_fake) #endif @@ -3262,7 +3262,7 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, DBUG_RETURN(1); } - exec_res= apply_event_and_update_pos(ev, thd, rli->group_info, NULL); + exec_res= apply_event_and_update_pos(ev, thd, serial_rgi, NULL); switch (typ) { case FORMAT_DESCRIPTION_EVENT: From a1cfd473469171e5a9700dbff0ee0e1eb84d6312 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 4 Jul 2013 09:20:56 +0200 Subject: [PATCH 08/41] MDEV-4506: Parallel replication: Intermediate commit. Wait for all worker threads to finish when stopping the SQL thread. (Only a basic wait; this still needs to be fixed to include timeout logic as in sql_slave_killed()). --- mysql-test/suite/rpl/t/rpl_parallel.test | 3 +-- sql/mysqld.cc | 6 ++++-- sql/mysqld.h | 3 ++- sql/rpl_parallel.cc | 25 ++++++++++++++++++++++++ sql/rpl_parallel.h | 3 +++ sql/slave.cc | 3 +++ 6 files changed, 38 insertions(+), 5 deletions(-) diff --git a/mysql-test/suite/rpl/t/rpl_parallel.test b/mysql-test/suite/rpl/t/rpl_parallel.test index 5748218dc10..3ace346e006 100644 --- a/mysql-test/suite/rpl/t/rpl_parallel.test +++ b/mysql-test/suite/rpl/t/rpl_parallel.test @@ -44,10 +44,9 @@ query_vertical SHOW SLAVE STATUS; --source include/start_slave.inc SELECT * FROM t1; ---sleep 1 -SELECT * FROM t1; --source include/stop_slave.inc +SELECT * FROM t1; --connection s1 SET sql_log_bin=0; diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 52c754993ac..816756338a4 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -894,7 +894,8 @@ PSI_cond_key key_RELAYLOG_update_cond, key_COND_wakeup_ready, key_COND_wait_commit; PSI_cond_key key_RELAYLOG_COND_queue_busy; PSI_cond_key key_TC_LOG_MMAP_COND_queue_busy; -PSI_cond_key key_COND_rpl_thread, key_COND_rpl_thread_pool; +PSI_cond_key key_COND_rpl_thread, key_COND_rpl_thread_pool, + key_COND_parallel_entry; static PSI_cond_info all_server_conds[]= { @@ -938,7 +939,8 @@ static PSI_cond_info all_server_conds[]= { &key_COND_thread_cache, "COND_thread_cache", PSI_FLAG_GLOBAL}, { &key_COND_flush_thread_cache, "COND_flush_thread_cache", PSI_FLAG_GLOBAL}, { &key_COND_rpl_thread, "COND_rpl_thread", 0}, - { &key_COND_rpl_thread_pool, "COND_rpl_thread_pool", 0} + { &key_COND_rpl_thread_pool, "COND_rpl_thread_pool", 0}, + { &key_COND_parallel_entry, "COND_parallel_entry", 0} }; PSI_thread_key key_thread_bootstrap, key_thread_delayed_insert, diff --git a/sql/mysqld.h b/sql/mysqld.h index d3b17cfefe1..3475835c67b 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -283,7 +283,8 @@ extern PSI_cond_key key_RELAYLOG_update_cond, key_COND_wakeup_ready, key_COND_wait_commit; extern PSI_cond_key key_RELAYLOG_COND_queue_busy; extern PSI_cond_key key_TC_LOG_MMAP_COND_queue_busy; -extern PSI_cond_key key_COND_rpl_thread, key_COND_rpl_thread_pool; +extern PSI_cond_key key_COND_rpl_thread, key_COND_rpl_thread_pool, + key_COND_parallel_entry; extern PSI_thread_key key_thread_bootstrap, key_thread_delayed_insert, key_thread_handle_manager, key_thread_kill_server, key_thread_main, diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index 1a6eb9e3d50..21c3dcf6d90 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -21,6 +21,9 @@ the logic in sql_slave_killed() that waits for current event group to complete needs to be extended appropriately... + - Audit the use of Relay_log_info::data_lock. Make sure it is held + correctly in all needed places also when using parallel replication. + - We need some user-configurable limit on how far ahead the SQL thread will fetch and queue events for parallel execution (otherwise if slave gets behind we will fill up memory with pending malloc()'ed events). @@ -194,7 +197,11 @@ handle_rpl_parallel_thread(void *arg) */ mysql_mutex_lock(&entry->LOCK_parallel_entry); if (entry->last_committed_sub_id < rgi->gtid_sub_id) + { entry->last_committed_sub_id= rgi->gtid_sub_id; + if (entry->need_signal) + mysql_cond_broadcast(&entry->COND_parallel_entry); + } mysql_mutex_unlock(&entry->LOCK_parallel_entry); rgi->commit_orderer.wakeup_subsequent_commits(); @@ -463,12 +470,30 @@ rpl_parallel::find(uint32 domain_id) } mysql_mutex_init(key_LOCK_parallel_entry, &e->LOCK_parallel_entry, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_parallel_entry, &e->COND_parallel_entry, NULL); } return e; } +void +rpl_parallel::wait_for_done() +{ + struct rpl_parallel_entry *e; + uint32 i; + + for (i= 0; i < domain_hash.records; ++i) + { + e= (struct rpl_parallel_entry *)my_hash_element(&domain_hash, i); + mysql_mutex_lock(&e->LOCK_parallel_entry); + while (e->current_sub_id > e->last_commit_id) + mysql_cond_wait(&e->COND_parallel_entry, &e->LOCK_parallel_entry); + mysql_mutex_unlock(&e->LOCK_parallel_entry); + } +} + + bool rpl_parallel::do_event(struct rpl_group_info *serial_rgi, Log_event *ev, THD *parent_thd) diff --git a/sql/rpl_parallel.h b/sql/rpl_parallel.h index b0367efdea6..09bde20f5af 100644 --- a/sql/rpl_parallel.h +++ b/sql/rpl_parallel.h @@ -50,6 +50,7 @@ struct rpl_parallel_entry { uint64 last_seq_no; uint64 last_commit_id; bool active; + bool need_signal; rpl_parallel_thread *rpl_thread; /* The sub_id of the last transaction to commit within this domain_id. @@ -57,6 +58,7 @@ struct rpl_parallel_entry { */ uint64 last_committed_sub_id; mysql_mutex_t LOCK_parallel_entry; + mysql_cond_t COND_parallel_entry; uint64 current_sub_id; struct rpl_group_info *current_group_info; }; @@ -67,6 +69,7 @@ struct rpl_parallel { rpl_parallel(); ~rpl_parallel(); rpl_parallel_entry *find(uint32 domain_id); + void wait_for_done(); bool do_event(struct rpl_group_info *serial_rgi, Log_event *ev, THD *thd); }; diff --git a/sql/slave.cc b/sql/slave.cc index ba4fef03639..9b3df653384 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -4342,6 +4342,9 @@ the slave SQL thread with \"SLAVE START\". We stopped at log \ rli->executed_entries++; } + if (opt_slave_parallel_threads > 0) + rli->parallel.wait_for_done(); + /* Thread stopped. Print the current replication position to the log */ { String tmp; From b5a496a777fd5c6f50a25cde852b86c74d18ee7a Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 4 Jul 2013 13:17:01 +0200 Subject: [PATCH 09/41] MDEV-4506: Parallel replication: Intermediate commit. Fix some bugs around waiting for worker threads to end during SQL slave stop. Free Log_event after parallel execution (still needs to be made thread-safe by using rpl_group_info rather than rli). --- sql/rpl_parallel.cc | 79 ++++++++++++++++++++++++++------------------- sql/rpl_rli.cc | 46 ++++++++++++++++++++++++++ sql/rpl_rli.h | 2 ++ sql/slave.cc | 44 ++++++------------------- 4 files changed, 102 insertions(+), 69 deletions(-) diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index 21c3dcf6d90..e5c700041ef 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -16,6 +16,7 @@ - Error handling. If we fail in one of multiple parallel executions, we need to make a best effort to complete prior transactions and roll back following transactions, so slave binlog position will be correct. + And all the retry logic for temporary errors like deadlock. - Stopping the slave needs to handle stopping all parallel executions. And the logic in sql_slave_killed() that waits for current event group to @@ -73,7 +74,6 @@ rpt_handle_event(rpl_parallel_thread::queued_event *qev, mysql_mutex_lock(&rli->data_lock); err= apply_event_and_update_pos(qev->ev, thd, rgi, rpt); /* ToDo: error handling. */ - /* ToDo: also free qev->ev, or hold on to it for a bit if necessary. */ } @@ -85,6 +85,7 @@ handle_rpl_parallel_thread(void *arg) struct rpl_parallel_thread::queued_event *events; bool group_standalone= true; bool in_event_group= false; + uint64 event_gtid_sub_id= 0; struct rpl_parallel_thread *rpt= (struct rpl_parallel_thread *)arg; @@ -142,6 +143,7 @@ handle_rpl_parallel_thread(void *arg) rpl_group_info *rgi= events->rgi; rpl_parallel_entry *entry= rgi->parallel_entry; uint64 wait_for_sub_id; + bool end_of_group; if (event_type == GTID_EVENT) { @@ -150,6 +152,9 @@ handle_rpl_parallel_thread(void *arg) (0 != (static_cast(events->ev)->flags2 & Gtid_log_event::FL_STANDALONE)); + /* Save this, as it gets cleared once event group commits. */ + event_gtid_sub_id= rgi->gtid_sub_id; + /* Register ourself to wait for the previous commit, if we need to do such registration _and_ that previous commit has not already @@ -173,43 +178,47 @@ handle_rpl_parallel_thread(void *arg) rpt_handle_event(events, thd, rpt); - if (in_event_group) + end_of_group= + in_event_group && + ((group_standalone && !Log_event::is_part_of_group(event_type)) || + event_type == XID_EVENT || + (event_type == QUERY_EVENT && + (!strcmp("COMMIT", ((Query_log_event *)events->ev)->query) || + !strcmp("ROLLBACK", ((Query_log_event *)events->ev)->query)))); + + /* ToDo: must use rgi here, not rli, for thread safety. */ + delete_or_keep_event_post_apply(rgi->rli, event_type, events->ev); + my_free(events); + + if (end_of_group) { - if ((group_standalone && !Log_event::is_part_of_group(event_type)) || - event_type == XID_EVENT || - (event_type == QUERY_EVENT && - (!strcmp("COMMIT", ((Query_log_event *)events->ev)->query) || - !strcmp("ROLLBACK", ((Query_log_event *)events->ev)->query)))) + in_event_group= false; + + rgi->commit_orderer.unregister_wait_for_prior_commit(); + thd->wait_for_commit_ptr= NULL; + + /* + Record that we have finished, so other event groups will no + longer attempt to wait for us to commit. + + We can race here with the next transactions, but that is fine, as + long as we check that we do not decrease last_committed_sub_id. If + this commit is done, then any prior commits will also have been + done and also no longer need waiting for. + */ + mysql_mutex_lock(&entry->LOCK_parallel_entry); + if (entry->last_committed_sub_id < event_gtid_sub_id) { - in_event_group= false; - - rgi->commit_orderer.unregister_wait_for_prior_commit(); - thd->wait_for_commit_ptr= NULL; - - /* - Record that we have finished, so other event groups will no - longer attempt to wait for us to commit. - - We can race here with the next transactions, but that is fine, as - long as we check that we do not decrease last_committed_sub_id. If - this commit is done, then any prior commits will also have been - done and also no longer need waiting for. - */ - mysql_mutex_lock(&entry->LOCK_parallel_entry); - if (entry->last_committed_sub_id < rgi->gtid_sub_id) - { - entry->last_committed_sub_id= rgi->gtid_sub_id; - if (entry->need_signal) - mysql_cond_broadcast(&entry->COND_parallel_entry); - } - mysql_mutex_unlock(&entry->LOCK_parallel_entry); - - rgi->commit_orderer.wakeup_subsequent_commits(); - delete rgi; + entry->last_committed_sub_id= event_gtid_sub_id; + if (entry->need_signal) + mysql_cond_broadcast(&entry->COND_parallel_entry); } + mysql_mutex_unlock(&entry->LOCK_parallel_entry); + + rgi->commit_orderer.wakeup_subsequent_commits(); + delete rgi; } - my_free(events); events= next; } @@ -487,7 +496,7 @@ rpl_parallel::wait_for_done() { e= (struct rpl_parallel_entry *)my_hash_element(&domain_hash, i); mysql_mutex_lock(&e->LOCK_parallel_entry); - while (e->current_sub_id > e->last_commit_id) + while (e->current_sub_id > e->last_committed_sub_id) mysql_cond_wait(&e->COND_parallel_entry, &e->LOCK_parallel_entry); mysql_mutex_unlock(&e->LOCK_parallel_entry); } @@ -605,6 +614,8 @@ rpl_parallel::do_event(struct rpl_group_info *serial_rgi, Log_event *ev, */ qev->rgi= serial_rgi; rpt_handle_event(qev, parent_thd, NULL); + delete_or_keep_event_post_apply(rli, typ, qev->ev); + return false; } else diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index bbf10dbcd51..f189f9adffa 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -1556,4 +1556,50 @@ event_group_new_gtid(rpl_group_info *rgi, Gtid_log_event *gev) return 0; } + +void +delete_or_keep_event_post_apply(Relay_log_info *rli, + Log_event_type typ, Log_event *ev) +{ + /* + ToDo: This needs to work on rpl_group_info, not Relay_log_info, to be + thread-safe for parallel replication. + */ + + switch (typ) { + case FORMAT_DESCRIPTION_EVENT: + /* + Format_description_log_event should not be deleted because it + will be used to read info about the relay log's format; + it will be deleted when the SQL thread does not need it, + i.e. when this thread terminates. + */ + break; + case ANNOTATE_ROWS_EVENT: + /* + Annotate_rows event should not be deleted because after it has + been applied, thd->query points to the string inside this event. + The thd->query will be used to generate new Annotate_rows event + during applying the subsequent Rows events. + */ + rli->set_annotate_event((Annotate_rows_log_event*) ev); + break; + case DELETE_ROWS_EVENT: + case UPDATE_ROWS_EVENT: + case WRITE_ROWS_EVENT: + /* + After the last Rows event has been applied, the saved Annotate_rows + event (if any) is not needed anymore and can be deleted. + */ + if (((Rows_log_event*)ev)->get_flags(Rows_log_event::STMT_END_F)) + rli->free_annotate_event(); + /* fall through */ + default: + DBUG_PRINT("info", ("Deleting the event after it has been executed")); + if (!rli->is_deferred_event(ev)) + delete ev; + break; + } +} + #endif diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h index b4daecadea8..c22773f9810 100644 --- a/sql/rpl_rli.h +++ b/sql/rpl_rli.h @@ -646,5 +646,7 @@ extern struct rpl_slave_state rpl_global_gtid_slave_state; int rpl_load_gtid_slave_state(THD *thd); int event_group_new_gtid(rpl_group_info *rgi, Gtid_log_event *gev); +void delete_or_keep_event_post_apply(Relay_log_info *rli, + Log_event_type typ, Log_event *ev); #endif /* RPL_RLI_H */ diff --git a/sql/slave.cc b/sql/slave.cc index 9b3df653384..474a6f902d2 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -3264,41 +3264,7 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, exec_res= apply_event_and_update_pos(ev, thd, serial_rgi, NULL); - switch (typ) { - case FORMAT_DESCRIPTION_EVENT: - /* - Format_description_log_event should not be deleted because it - will be used to read info about the relay log's format; - it will be deleted when the SQL thread does not need it, - i.e. when this thread terminates. - */ - break; - case ANNOTATE_ROWS_EVENT: - /* - Annotate_rows event should not be deleted because after it has - been applied, thd->query points to the string inside this event. - The thd->query will be used to generate new Annotate_rows event - during applying the subsequent Rows events. - */ - rli->set_annotate_event((Annotate_rows_log_event*) ev); - break; - case DELETE_ROWS_EVENT: - case UPDATE_ROWS_EVENT: - case WRITE_ROWS_EVENT: - /* - After the last Rows event has been applied, the saved Annotate_rows - event (if any) is not needed anymore and can be deleted. - */ - if (((Rows_log_event*)ev)->get_flags(Rows_log_event::STMT_END_F)) - rli->free_annotate_event(); - /* fall through */ - default: - DBUG_PRINT("info", ("Deleting the event after it has been executed")); - if (!rli->is_deferred_event(ev)) - delete ev; - break; - } - + delete_or_keep_event_post_apply(rli, typ, ev); /* update_log_pos failed: this should not happen, so we don't @@ -4363,6 +4329,14 @@ the slave SQL thread with \"SLAVE START\". We stopped at log \ err: + /* + Once again, in case we aborted with an error and skipped the first one. + (We want the first one to be before the printout of stop position to + get the correct position printed.) + */ + if (opt_slave_parallel_threads > 0) + rli->parallel.wait_for_done(); + /* Some events set some playgrounds, which won't be cleared because thread stops. Stopping of this thread may not be known to these events ("stop" From e654be3865d7c8a6ad6339b2de2c45f02c9f7981 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 5 Jul 2013 00:26:15 +0200 Subject: [PATCH 10/41] MDEV-4506: Parallel replication: Intermediate commit. Impement options --binlog-commit-wait-count and --binlog-commit-wait-usec. These options permit the DBA to deliberately increase latency of an individual commit to get more transactions in each binlog group commit. This increases the opportunity for parallel replication on the slave, and can also decrease I/O load on the master. The options also make it easier to test the parallel replication with mysql-test-run. --- mysql-test/suite/rpl/t/rpl_parallel2.test | 70 +++++++++++++++++++++++ sql/log.cc | 47 +++++++++++++++ sql/log.h | 3 + sql/mysqld.cc | 9 ++- sql/mysqld.h | 2 + sql/rpl_parallel.cc | 3 +- sql/rpl_parallel.h | 1 - sql/sys_vars.cc | 20 +++++++ 8 files changed, 150 insertions(+), 5 deletions(-) create mode 100644 mysql-test/suite/rpl/t/rpl_parallel2.test diff --git a/mysql-test/suite/rpl/t/rpl_parallel2.test b/mysql-test/suite/rpl/t/rpl_parallel2.test new file mode 100644 index 00000000000..b3f970c909a --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_parallel2.test @@ -0,0 +1,70 @@ +--source include/have_binlog_format_statement.inc +--source include/have_xtradb.inc + +connect (m1,127.0.0.1,root,,test,$MASTER_MYPORT,); +connect (m2,127.0.0.1,root,,test,$MASTER_MYPORT,); +connect (m3,127.0.0.1,root,,test,$MASTER_MYPORT,); +connect (m4,127.0.0.1,root,,test,$MASTER_MYPORT,); +connect (s1,127.0.0.1,root,,test,$SLAVE_MYPORT,); +connect (s2,127.0.0.1,root,,test,$SLAVE_MYPORT,); +connect (s3,127.0.0.1,root,,test,$SLAVE_MYPORT,); +connect (s4,127.0.0.1,root,,test,$SLAVE_MYPORT,); + +--connection m1 +SELECT @@server_id; +SET sql_log_bin=0; +CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB; +SET sql_log_bin=1; +SET @old_count= @@GLOBAL.binlog_commit_wait_count; +SET @old_usec= @@GLOBAL.binlog_commit_wait_usec; +SET GLOBAL binlog_commit_wait_usec = 30*1000000; + +--connection s1 +SELECT @@server_id; +SET sql_log_bin=0; +CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB; +SET sql_log_bin=1; + +--replace_result $MASTER_MYPORT MASTER_PORT +eval CHANGE MASTER TO master_host = '127.0.0.1', master_port = $MASTER_MYPORT, + master_user='root', master_use_gtid=current_pos; + +--connection m1 +SET GLOBAL binlog_commit_wait_count = 4; + +send INSERT INTO t1 VALUES (1); + +--connection m2 +send INSERT INTO t1 VALUES (2); +--connection m3 +send INSERT INTO t1 VALUES (3); +--connection m4 +INSERT INTO t1 VALUES (4); +--connection m1 +reap; +--connection m2 +reap; +--connection m3 +reap; + +--connection m1 +SHOW BINLOG EVENTS; + +--connection s1 +--source include/start_slave.inc +SELECT * FROM t1; +--source include/stop_slave.inc +SELECT * FROM t1; + +--connection m1 +SET sql_log_bin=0; +DROP TABLE t1; +SET sql_log_bin=1; +SET GLOBAL binlog_commit_wait_count= @old_count; +SET GLOBAL binlog_commit_wait_usec= @old_usec; + +--connection s1 +RESET SLAVE ALL; +SET sql_log_bin=0; +DROP TABLE t1; +SET sql_log_bin=1; diff --git a/sql/log.cc b/sql/log.cc index e3eb5f9a331..61d4428fc18 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -88,6 +88,7 @@ ulong opt_binlog_dbug_fsync_sleep= 0; #endif mysql_mutex_t LOCK_prepare_ordered; +mysql_cond_t COND_prepare_ordered; mysql_mutex_t LOCK_commit_ordered; static ulonglong binlog_status_var_num_commits; @@ -6679,6 +6680,8 @@ MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *entry, } } + if (opt_binlog_commit_wait_count > 0) + mysql_cond_signal(&COND_prepare_ordered); mysql_mutex_unlock(&LOCK_prepare_ordered); DEBUG_SYNC(entry->thd, "commit_after_release_LOCK_prepare_ordered"); @@ -6840,6 +6843,8 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader) binlog_id= current_binlog_id; mysql_mutex_lock(&LOCK_prepare_ordered); + if (opt_binlog_commit_wait_count) + wait_for_sufficient_commits(); current= group_commit_queue; group_commit_queue= NULL; mysql_mutex_unlock(&LOCK_prepare_ordered); @@ -7135,6 +7140,48 @@ MYSQL_BIN_LOG::write_transaction_or_stmt(group_commit_entry *entry, return 0; } + +void +MYSQL_BIN_LOG::wait_for_sufficient_commits() +{ + size_t count; + group_commit_entry *e; + group_commit_entry *last_head; + struct timespec wait_until; + + mysql_mutex_assert_owner(&LOCK_log); + mysql_mutex_assert_owner(&LOCK_prepare_ordered); + + count= 0; + for (e= last_head= group_commit_queue; e; e= e->next) + ++count; + if (count >= opt_binlog_commit_wait_count) + return; + + mysql_mutex_unlock(&LOCK_log); + set_timespec_nsec(wait_until, (ulonglong)1000*opt_binlog_commit_wait_usec); + + for (;;) + { + int err; + group_commit_entry *head; + + err= mysql_cond_timedwait(&COND_prepare_ordered, &LOCK_prepare_ordered, + &wait_until); + if (err == ETIMEDOUT) + break; + head= group_commit_queue; + for (e= head; e && e != last_head; e= e->next) + ++count; + if (count >= opt_binlog_commit_wait_count) + break; + last_head= head; + } + + mysql_mutex_lock(&LOCK_log); +} + + /** Wait until we get a signal that the relay log has been updated. diff --git a/sql/log.h b/sql/log.h index 2345f0acf9c..48cc568da11 100644 --- a/sql/log.h +++ b/sql/log.h @@ -85,9 +85,11 @@ protected: prepare_ordered() or commit_ordered() methods. */ extern mysql_mutex_t LOCK_prepare_ordered; +extern mysql_cond_t COND_prepare_ordered; extern mysql_mutex_t LOCK_commit_ordered; #ifdef HAVE_PSI_INTERFACE extern PSI_mutex_key key_LOCK_prepare_ordered, key_LOCK_commit_ordered; +extern PSI_cond_key key_COND_prepare_ordered; #endif class TC_LOG_DUMMY: public TC_LOG // use it to disable the logging @@ -685,6 +687,7 @@ public: } void set_max_size(ulong max_size_arg); void signal_update(); + void wait_for_sufficient_commits(); void wait_for_update_relay_log(THD* thd); int wait_for_update_bin_log(THD* thd, const struct timespec * timeout); void init(ulong max_size); diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 816756338a4..a7fa78838a9 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -544,6 +544,8 @@ ulong rpl_recovery_rank=0; ulong stored_program_cache_size= 0; ulong opt_slave_parallel_threads= 0; +ulong opt_binlog_commit_wait_count= 0; +ulong opt_binlog_commit_wait_usec= 0; const double log_10[] = { 1e000, 1e001, 1e002, 1e003, 1e004, 1e005, 1e006, 1e007, 1e008, 1e009, @@ -895,7 +897,7 @@ PSI_cond_key key_RELAYLOG_update_cond, key_COND_wakeup_ready, PSI_cond_key key_RELAYLOG_COND_queue_busy; PSI_cond_key key_TC_LOG_MMAP_COND_queue_busy; PSI_cond_key key_COND_rpl_thread, key_COND_rpl_thread_pool, - key_COND_parallel_entry; + key_COND_parallel_entry, key_COND_prepare_ordered; static PSI_cond_info all_server_conds[]= { @@ -940,7 +942,8 @@ static PSI_cond_info all_server_conds[]= { &key_COND_flush_thread_cache, "COND_flush_thread_cache", PSI_FLAG_GLOBAL}, { &key_COND_rpl_thread, "COND_rpl_thread", 0}, { &key_COND_rpl_thread_pool, "COND_rpl_thread_pool", 0}, - { &key_COND_parallel_entry, "COND_parallel_entry", 0} + { &key_COND_parallel_entry, "COND_parallel_entry", 0}, + { &key_COND_prepare_ordered, "COND_prepare_ordered", 0} }; PSI_thread_key key_thread_bootstrap, key_thread_delayed_insert, @@ -2017,6 +2020,7 @@ static void clean_up_mutexes() mysql_mutex_destroy(&LOCK_server_started); mysql_cond_destroy(&COND_server_started); mysql_mutex_destroy(&LOCK_prepare_ordered); + mysql_cond_destroy(&COND_prepare_ordered); mysql_mutex_destroy(&LOCK_commit_ordered); DBUG_VOID_RETURN; } @@ -4117,6 +4121,7 @@ static int init_thread_environment() &LOCK_rpl_gtid_state, MY_MUTEX_INIT_SLOW); mysql_mutex_init(key_LOCK_prepare_ordered, &LOCK_prepare_ordered, MY_MUTEX_INIT_SLOW); + mysql_cond_init(key_COND_prepare_ordered, &COND_prepare_ordered, NULL); mysql_mutex_init(key_LOCK_commit_ordered, &LOCK_commit_ordered, MY_MUTEX_INIT_SLOW); diff --git a/sql/mysqld.h b/sql/mysqld.h index 3475835c67b..345e9fa74c9 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -177,6 +177,8 @@ extern ulong opt_binlog_rows_event_max_size; extern ulong rpl_recovery_rank, thread_cache_size; extern ulong stored_program_cache_size; extern ulong opt_slave_parallel_threads; +extern ulong opt_binlog_commit_wait_count; +extern ulong opt_binlog_commit_wait_usec; extern ulong back_log; extern ulong executed_events; extern char language[FN_REFLEN]; diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index e5c700041ef..8f97c19e5ad 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -210,8 +210,7 @@ handle_rpl_parallel_thread(void *arg) if (entry->last_committed_sub_id < event_gtid_sub_id) { entry->last_committed_sub_id= event_gtid_sub_id; - if (entry->need_signal) - mysql_cond_broadcast(&entry->COND_parallel_entry); + mysql_cond_broadcast(&entry->COND_parallel_entry); } mysql_mutex_unlock(&entry->LOCK_parallel_entry); diff --git a/sql/rpl_parallel.h b/sql/rpl_parallel.h index 09bde20f5af..a84722e9263 100644 --- a/sql/rpl_parallel.h +++ b/sql/rpl_parallel.h @@ -50,7 +50,6 @@ struct rpl_parallel_entry { uint64 last_seq_no; uint64 last_commit_id; bool active; - bool need_signal; rpl_parallel_thread *rpl_thread; /* The sub_id of the last transaction to commit within this domain_id. diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index f63960a4e36..1273bff1750 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -1483,6 +1483,26 @@ static Sys_var_ulong Sys_slave_parallel_threads( #endif +static Sys_var_ulong Sys_binlog_commit_wait_count( + "binlog_commit_wait_count", + "If non-zero, binlog write will wait at most binlog_commit_wait_usec " + "microseconds for at least this many commits to queue up for group " + "commit to the binlog. This can reduce I/O on the binlog and provide " + "increased opportunity for parallel apply on the slave, but too high " + "a value will decrease commit throughput.", + GLOBAL_VAR(opt_binlog_commit_wait_count), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, ULONG_MAX), DEFAULT(0), BLOCK_SIZE(1)); + + +static Sys_var_ulong Sys_binlog_commit_wait_usec( + "binlog_commit_wait_usec", + "Maximum time, in microseconds, to wait for more commits to queue up " + " for binlog group commit. Only takes effect if the value of " + "binlog_commit_wait_count is non-zero.", + GLOBAL_VAR(opt_binlog_commit_wait_usec), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, ULONG_MAX), DEFAULT(100000), BLOCK_SIZE(1)); + + static bool fix_max_join_size(sys_var *self, THD *thd, enum_var_type type) { SV *sv= type == OPT_GLOBAL ? &global_system_variables : &thd->variables; From a99356fbe72fbca61617edabc5a8928da4343c96 Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 8 Jul 2013 16:47:07 +0200 Subject: [PATCH 11/41] MDEV-4506: Parallel replication: intermediate commit. Fix a bunch of issues found with locking, ordering, and non-thread-safe stuff in Relay_log_info. Now able to do a simple benchmark, showing 4.5 times speedup for applying a binlog with 10000 REPLACE statements. --- sql/log.h | 2 +- sql/log_event.cc | 2 +- sql/log_event_old.cc | 4 +- sql/rpl_parallel.cc | 132 +++++++++++++++++++++++++------------------ sql/rpl_parallel.h | 11 +++- sql/rpl_rli.cc | 8 +-- sql/rpl_rli.h | 12 +++- sql/slave.cc | 3 +- 8 files changed, 109 insertions(+), 65 deletions(-) diff --git a/sql/log.h b/sql/log.h index 48cc568da11..efb560dc245 100644 --- a/sql/log.h +++ b/sql/log.h @@ -408,7 +408,7 @@ private: class binlog_cache_mngr; struct rpl_gtid; -class wait_for_commit; +struct wait_for_commit; class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG { private: diff --git a/sql/log_event.cc b/sql/log_event.cc index 1f8685e34b8..cb7bc3924f5 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -9101,7 +9101,7 @@ int Rows_log_event::do_apply_event(struct rpl_group_info *rgi) do_apply_event(). We still check here to prevent future coding errors. */ - DBUG_ASSERT(rli->sql_thd == thd); + DBUG_ASSERT(rgi->thd == thd); /* If there is no locks taken, this is the first binrow event seen diff --git a/sql/log_event_old.cc b/sql/log_event_old.cc index 4be3e2720de..d3e9d47d64a 100644 --- a/sql/log_event_old.cc +++ b/sql/log_event_old.cc @@ -68,7 +68,7 @@ Old_rows_log_event::do_apply_event(Old_rows_log_event *ev, struct rpl_group_info do_apply_event(). We still check here to prevent future coding errors. */ - DBUG_ASSERT(rli->sql_thd == ev_thd); + DBUG_ASSERT(rgi->thd == ev_thd); /* If there is no locks taken, this is the first binrow event seen @@ -1481,7 +1481,7 @@ int Old_rows_log_event::do_apply_event(struct rpl_group_info *rgi) do_apply_event(). We still check here to prevent future coding errors. */ - DBUG_ASSERT(rli->sql_thd == thd); + DBUG_ASSERT(rgi->thd == thd); /* If there is no locks taken, this is the first binrow event seen diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index 8f97c19e5ad..63066d8d7c0 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -38,15 +38,16 @@ everything needs to be correctly rolled back and stopped in all threads, to ensure a consistent slave replication state. - - We need some knob on the master to allow the user to deliberately delay - commits waiting for more transactions to join group commit, to increase - potential for parallel execution on the slave. - - Handle the case of a partial event group. This occurs when the master crashes in the middle of writing the event group to the binlog. The slave rolls back the transaction; parallel execution needs to be able to deal with this wrt. commit_orderer and such. + - Relay_log_info::is_in_group(). This needs to be handled correctly in all + callers. I think it needs to be split into two, one version in + Relay_log_info to be used from next_event() in slave.cc, one to be used in + per-transaction stuff. + - We should fail if we connect to the master with opt_slave_parallel_threads greater than zero and master does not support GTID. Just to avoid a bunch of potential problems, we won't be able to do any parallel replication @@ -58,12 +59,12 @@ struct rpl_parallel_thread_pool global_rpl_thread_pool; static void rpt_handle_event(rpl_parallel_thread::queued_event *qev, - THD *thd, struct rpl_parallel_thread *rpt) { int err; struct rpl_group_info *rgi= qev->rgi; Relay_log_info *rli= rgi->rli; + THD *thd= rgi->thd; thd->rli_slave= rli; thd->rpl_filter = rli->mi->rpl_filter; @@ -143,6 +144,7 @@ handle_rpl_parallel_thread(void *arg) rpl_group_info *rgi= events->rgi; rpl_parallel_entry *entry= rgi->parallel_entry; uint64 wait_for_sub_id; + uint64 wait_start_sub_id; bool end_of_group; if (event_type == GTID_EVENT) @@ -155,14 +157,28 @@ handle_rpl_parallel_thread(void *arg) /* Save this, as it gets cleared once event group commits. */ event_gtid_sub_id= rgi->gtid_sub_id; + rgi->thd= thd; + /* Register ourself to wait for the previous commit, if we need to do such registration _and_ that previous commit has not already occured. + + Also do not start parallel execution of this event group until all + prior groups have committed that are not safe to run in parallel with. */ - if ((wait_for_sub_id= rgi->wait_commit_sub_id)) + wait_for_sub_id= rgi->wait_commit_sub_id; + wait_start_sub_id= rgi->wait_start_sub_id; + if (wait_for_sub_id || wait_start_sub_id) { mysql_mutex_lock(&entry->LOCK_parallel_entry); + if (wait_start_sub_id) + { + while (wait_start_sub_id > entry->last_committed_sub_id) + mysql_cond_wait(&entry->COND_parallel_entry, + &entry->LOCK_parallel_entry); + } + rgi->wait_start_sub_id= 0; /* No need to check again. */ if (wait_for_sub_id > entry->last_committed_sub_id) { wait_for_commit *waitee= @@ -176,7 +192,7 @@ handle_rpl_parallel_thread(void *arg) thd->wait_for_commit_ptr= &rgi->commit_orderer; } - rpt_handle_event(events, thd, rpt); + rpt_handle_event(events, rpt); end_of_group= in_event_group && @@ -376,6 +392,7 @@ err: while (new_free_list->running) mysql_cond_wait(&new_free_list->COND_rpl_thread, &new_free_list->LOCK_rpl_thread); + mysql_mutex_unlock(&new_free_list->LOCK_rpl_thread); my_free(new_free_list); new_free_list= next; } @@ -503,8 +520,7 @@ rpl_parallel::wait_for_done() bool -rpl_parallel::do_event(struct rpl_group_info *serial_rgi, Log_event *ev, - THD *parent_thd) +rpl_parallel::do_event(struct rpl_group_info *serial_rgi, Log_event *ev) { rpl_parallel_entry *e; rpl_parallel_thread *cur_thread; @@ -530,51 +546,15 @@ rpl_parallel::do_event(struct rpl_group_info *serial_rgi, Log_event *ev, Gtid_log_event *gtid_ev= static_cast(ev); if (!(e= find(gtid_ev->domain_id)) || - !(e->current_group_info= rgi= new rpl_group_info(rli)) || + !(rgi= new rpl_group_info(rli)) || event_group_new_gtid(rgi, gtid_ev)) { my_error(ER_OUT_OF_RESOURCES, MYF(MY_WME)); return true; } - /* Check if we already have a worker thread for this entry. */ - cur_thread= e->rpl_thread; - if (cur_thread) - { - mysql_mutex_lock(&cur_thread->LOCK_rpl_thread); - if (cur_thread->current_entry != e) - { - /* Not ours anymore, we need to grab a new one. */ - mysql_mutex_unlock(&cur_thread->LOCK_rpl_thread); - e->rpl_thread= cur_thread= NULL; - } - } - - if (!cur_thread) - { - /* - Nothing else is currently running in this domain. We can spawn a new - thread to do this event group in parallel with anything else that might - be running in other domains. - */ - if (gtid_ev->flags & Gtid_log_event::FL_GROUP_COMMIT_ID) - { - e->last_server_id= gtid_ev->server_id; - e->last_seq_no= gtid_ev->seq_no; - e->last_commit_id= gtid_ev->commit_id; - } - else - { - e->last_server_id= 0; - e->last_seq_no= 0; - e->last_commit_id= 0; - } - cur_thread= e->rpl_thread= global_rpl_thread_pool.get_thread(e); - rgi->wait_commit_sub_id= 0; - /* get_thread() returns with the LOCK_rpl_thread locked. */ - } - else if ((gtid_ev->flags & Gtid_log_event::FL_GROUP_COMMIT_ID) && - e->last_commit_id == gtid_ev->commit_id) + if ((gtid_ev->flags2 & Gtid_log_event::FL_GROUP_COMMIT_ID) && + e->last_commit_id == gtid_ev->commit_id) { /* We are already executing something else in this domain. But the two @@ -588,19 +568,63 @@ rpl_parallel::do_event(struct rpl_group_info *serial_rgi, Log_event *ev, rpl_parallel_thread *rpt= global_rpl_thread_pool.get_thread(e); rgi->wait_commit_sub_id= e->current_sub_id; rgi->wait_commit_group_info= e->current_group_info; + rgi->wait_start_sub_id= e->prev_groupcommit_sub_id; e->rpl_thread= cur_thread= rpt; /* get_thread() returns with the LOCK_rpl_thread locked. */ } else { - /* - We are still executing the previous event group for this replication - domain, and we have to wait for that to finish before we can start on - the next one. So just re-use the thread. - */ + /* Check if we already have a worker thread for this entry. */ + cur_thread= e->rpl_thread; + if (cur_thread) + { + mysql_mutex_lock(&cur_thread->LOCK_rpl_thread); + if (cur_thread->current_entry != e) + { + /* Not ours anymore, we need to grab a new one. */ + mysql_mutex_unlock(&cur_thread->LOCK_rpl_thread); + e->rpl_thread= cur_thread= NULL; + } + } + + if (!cur_thread) + { + /* + Nothing else is currently running in this domain. We can spawn a new + thread to do this event group in parallel with anything else that might + be running in other domains. + */ + cur_thread= e->rpl_thread= global_rpl_thread_pool.get_thread(e); + /* get_thread() returns with the LOCK_rpl_thread locked. */ + } + else + { + /* + We are still executing the previous event group for this replication + domain, and we have to wait for that to finish before we can start on + the next one. So just re-use the thread. + */ + } + rgi->wait_commit_sub_id= 0; + rgi->wait_start_sub_id= 0; + e->prev_groupcommit_sub_id= e->current_sub_id; } + if (gtid_ev->flags2 & Gtid_log_event::FL_GROUP_COMMIT_ID) + { + e->last_server_id= gtid_ev->server_id; + e->last_seq_no= gtid_ev->seq_no; + e->last_commit_id= gtid_ev->commit_id; + } + else + { + e->last_server_id= 0; + e->last_seq_no= 0; + e->last_commit_id= 0; + } + + e->current_group_info= rgi; e->current_sub_id= rgi->gtid_sub_id; current= rgi->parallel_entry= e; } @@ -612,7 +636,7 @@ rpl_parallel::do_event(struct rpl_group_info *serial_rgi, Log_event *ev, but they might be from an old master). */ qev->rgi= serial_rgi; - rpt_handle_event(qev, parent_thd, NULL); + rpt_handle_event(qev, NULL); delete_or_keep_event_post_apply(rli, typ, qev->ev); return false; diff --git a/sql/rpl_parallel.h b/sql/rpl_parallel.h index a84722e9263..304263c3477 100644 --- a/sql/rpl_parallel.h +++ b/sql/rpl_parallel.h @@ -60,6 +60,15 @@ struct rpl_parallel_entry { mysql_cond_t COND_parallel_entry; uint64 current_sub_id; struct rpl_group_info *current_group_info; + /* + The sub_id of the last event group in the previous batch of group-committed + transactions. + + When we spawn parallel worker threads for the next group-committed batch, + they first need to wait for this sub_id to be committed before it is safe + to start executing them. + */ + uint64 prev_groupcommit_sub_id; }; struct rpl_parallel { HASH domain_hash; @@ -69,7 +78,7 @@ struct rpl_parallel { ~rpl_parallel(); rpl_parallel_entry *find(uint32 domain_id); void wait_for_done(); - bool do_event(struct rpl_group_info *serial_rgi, Log_event *ev, THD *thd); + bool do_event(struct rpl_group_info *serial_rgi, Log_event *ev); }; diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index f189f9adffa..8fb22266d5e 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -1226,7 +1226,7 @@ void Relay_log_info::stmt_done(my_off_t event_master_log_pos, middle of the "transaction". START SLAVE will resume at BEGIN while the MyISAM table has already been updated. */ - if ((sql_thd->variables.option_bits & OPTION_BEGIN) && opt_using_transactions) + if ((rgi->thd->variables.option_bits & OPTION_BEGIN) && opt_using_transactions) inc_event_relay_log_pos(); else { @@ -1267,7 +1267,7 @@ void Relay_log_info::cleanup_context(THD *thd, bool error) { DBUG_ENTER("Relay_log_info::cleanup_context"); - DBUG_ASSERT(sql_thd == thd); + DBUG_ASSERT(opt_slave_parallel_threads > 0 || sql_thd == thd); /* 1) Instances of Table_map_log_event, if ::do_apply_event() was called on them, may have opened tables, which we cannot be sure have been closed (because @@ -1534,8 +1534,8 @@ end: rpl_group_info::rpl_group_info(Relay_log_info *rli_) - : rli(rli_), gtid_sub_id(0), wait_commit_sub_id(0), wait_commit_group_info(0), - parallel_entry(0) + : rli(rli_), thd(0), gtid_sub_id(0), wait_commit_sub_id(0), + wait_commit_group_info(0), wait_start_sub_id(0), parallel_entry(0) { bzero(¤t_gtid, sizeof(current_gtid)); } diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h index c22773f9810..294f2ba885a 100644 --- a/sql/rpl_rli.h +++ b/sql/rpl_rli.h @@ -604,6 +604,7 @@ private: struct rpl_group_info { Relay_log_info *rli; + THD *thd; /* Current GTID being processed. The sub_id gives the binlog order within one domain_id. A zero sub_id @@ -630,10 +631,19 @@ struct rpl_group_info */ uint64 wait_commit_sub_id; struct rpl_group_info *wait_commit_group_info; + /* + If non-zero, the event group must wait for this sub_id to be committed + before the execution of the event group is allowed to start. + + (When we execute in parallel the transactions that group committed + together on the master, we still need to wait for any prior transactions + to have commtted). + */ + uint64 wait_start_sub_id; struct rpl_parallel_entry *parallel_entry; - rpl_group_info(Relay_log_info *rli); + rpl_group_info(Relay_log_info *rli_); ~rpl_group_info() { }; }; diff --git a/sql/slave.cc b/sql/slave.cc index 474a6f902d2..b9ef3172364 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -3246,7 +3246,7 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, } if (opt_slave_parallel_threads > 0) - DBUG_RETURN(rli->parallel.do_event(serial_rgi, ev, thd)); + DBUG_RETURN(rli->parallel.do_event(serial_rgi, ev)); /* For GTID, allocate a new sub_id for the given domain_id. @@ -3995,6 +3995,7 @@ pthread_handler_t handle_slave_sql(void *arg) thd = new THD; // note that contructor of THD uses DBUG_ ! thd->thread_stack = (char*)&thd; // remember where our stack is thd->rpl_filter = mi->rpl_filter; + serial_rgi.thd= thd; DBUG_ASSERT(rli->inited); DBUG_ASSERT(rli->mi == mi); From 6d5f237e091ca7aa4fdd52c186af11fffc80b1c2 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 9 Jul 2013 13:15:53 +0200 Subject: [PATCH 12/41] MDEV-4506: Parallel replication: Intermediate commit. Fix a number of failures in the test suite. --- mysql-test/r/mysqld--help.result | 19 +++++++++++++ .../suite/perfschema/r/all_instances.result | 3 ++ .../perfschema/r/dml_setup_instruments.result | 8 +++--- .../r/binlog_commit_wait_count_basic.result | 13 +++++++++ .../r/binlog_commit_wait_usec_basic.result | 13 +++++++++ .../r/slave_parallel_threads_basic.result | 13 +++++++++ .../t/binlog_commit_wait_count_basic.test | 14 ++++++++++ .../t/binlog_commit_wait_usec_basic.test | 14 ++++++++++ .../t/slave_parallel_threads_basic.test | 14 ++++++++++ sql/rpl_parallel.cc | 28 +++++++++++++++++-- sql/sql_binlog.cc | 17 +++-------- sql/sys_vars.cc | 11 ++++---- 12 files changed, 141 insertions(+), 26 deletions(-) create mode 100644 mysql-test/suite/sys_vars/r/binlog_commit_wait_count_basic.result create mode 100644 mysql-test/suite/sys_vars/r/binlog_commit_wait_usec_basic.result create mode 100644 mysql-test/suite/sys_vars/r/slave_parallel_threads_basic.result create mode 100644 mysql-test/suite/sys_vars/t/binlog_commit_wait_count_basic.test create mode 100644 mysql-test/suite/sys_vars/t/binlog_commit_wait_usec_basic.test create mode 100644 mysql-test/suite/sys_vars/t/slave_parallel_threads_basic.test diff --git a/mysql-test/r/mysqld--help.result b/mysql-test/r/mysqld--help.result index d9cdf3c3240..f6f03b42270 100644 --- a/mysql-test/r/mysqld--help.result +++ b/mysql-test/r/mysqld--help.result @@ -41,6 +41,17 @@ The following options may be given as the first argument: Type of BINLOG_CHECKSUM_ALG. Include checksum for log events in the binary log. Possible values are NONE and CRC32; default is NONE. + --binlog-commit-wait-count=# + If non-zero, binlog write will wait at most + binlog_commit_wait_usec microseconds for at least this + many commits to queue up for group commit to the binlog. + This can reduce I/O on the binlog and provide increased + opportunity for parallel apply on the slave, but too high + a value will decrease commit throughput. + --binlog-commit-wait-usec=# + Maximum time, in microseconds, to wait for more commits + to queue up for binlog group commit. Only takes effect if + the value of binlog_commit_wait_count is non-zero. --binlog-direct-non-transactional-updates Causes updates to non-transactional engines using statement format to be written directly to binary log. @@ -783,6 +794,11 @@ The following options may be given as the first argument: --slave-net-timeout=# Number of seconds to wait for more data from any master/slave connection before aborting the read + --slave-parallel-threads=# + If non-zero, number of threads to spawn to apply in + parallel events on the slave that were group-committed on + the master or were logged with GTID in different + replication domains. --slave-skip-errors=name Tells the slave thread to continue replication when a query event returns an error from the provided list @@ -922,6 +938,8 @@ bind-address (No default value) binlog-annotate-row-events FALSE binlog-cache-size 32768 binlog-checksum NONE +binlog-commit-wait-count 0 +binlog-commit-wait-usec 100000 binlog-direct-non-transactional-updates FALSE binlog-format STATEMENT binlog-optimize-thread-scheduling TRUE @@ -1130,6 +1148,7 @@ slave-compressed-protocol FALSE slave-exec-mode STRICT slave-max-allowed-packet 1073741824 slave-net-timeout 3600 +slave-parallel-threads 0 slave-skip-errors (No default value) slave-sql-verify-checksum TRUE slave-transaction-retries 10 diff --git a/mysql-test/suite/perfschema/r/all_instances.result b/mysql-test/suite/perfschema/r/all_instances.result index 8ecb8dfe602..f338461f5cd 100644 --- a/mysql-test/suite/perfschema/r/all_instances.result +++ b/mysql-test/suite/perfschema/r/all_instances.result @@ -61,6 +61,7 @@ wait/synch/mutex/sql/LOCK_prepared_stmt_count wait/synch/mutex/sql/LOCK_prepare_ordered wait/synch/mutex/sql/LOCK_rpl_gtid_state wait/synch/mutex/sql/LOCK_rpl_status +wait/synch/mutex/sql/LOCK_rpl_thread_pool wait/synch/mutex/sql/LOCK_server_started wait/synch/mutex/sql/LOCK_slave_list wait/synch/mutex/sql/LOCK_slave_state @@ -122,8 +123,10 @@ wait/synch/cond/mysys/COND_alarm wait/synch/cond/mysys/my_thread_var::suspend wait/synch/cond/mysys/THR_COND_threads wait/synch/cond/sql/COND_flush_thread_cache +wait/synch/cond/sql/COND_prepare_ordered wait/synch/cond/sql/COND_queue_state wait/synch/cond/sql/COND_rpl_status +wait/synch/cond/sql/COND_rpl_thread_pool wait/synch/cond/sql/COND_server_started wait/synch/cond/sql/COND_thread_cache wait/synch/cond/sql/COND_thread_count diff --git a/mysql-test/suite/perfschema/r/dml_setup_instruments.result b/mysql-test/suite/perfschema/r/dml_setup_instruments.result index e1a5fdd11f6..38c02cc2bf4 100644 --- a/mysql-test/suite/perfschema/r/dml_setup_instruments.result +++ b/mysql-test/suite/perfschema/r/dml_setup_instruments.result @@ -38,14 +38,14 @@ order by name limit 10; NAME ENABLED TIMED wait/synch/cond/sql/COND_flush_thread_cache YES YES wait/synch/cond/sql/COND_manager YES YES +wait/synch/cond/sql/COND_parallel_entry YES YES +wait/synch/cond/sql/COND_prepare_ordered YES YES wait/synch/cond/sql/COND_queue_state YES YES wait/synch/cond/sql/COND_rpl_status YES YES +wait/synch/cond/sql/COND_rpl_thread YES YES +wait/synch/cond/sql/COND_rpl_thread_pool YES YES wait/synch/cond/sql/COND_server_started YES YES wait/synch/cond/sql/COND_thread_cache YES YES -wait/synch/cond/sql/COND_thread_count YES YES -wait/synch/cond/sql/Delayed_insert::cond YES YES -wait/synch/cond/sql/Delayed_insert::cond_client YES YES -wait/synch/cond/sql/Event_scheduler::COND_state YES YES select * from performance_schema.setup_instruments where name='Wait'; select * from performance_schema.setup_instruments diff --git a/mysql-test/suite/sys_vars/r/binlog_commit_wait_count_basic.result b/mysql-test/suite/sys_vars/r/binlog_commit_wait_count_basic.result new file mode 100644 index 00000000000..6837489311a --- /dev/null +++ b/mysql-test/suite/sys_vars/r/binlog_commit_wait_count_basic.result @@ -0,0 +1,13 @@ +SET @save_binlog_commit_wait_count= @@GLOBAL.binlog_commit_wait_count; +SELECT @@GLOBAL.binlog_commit_wait_count as 'must be zero because of default'; +must be zero because of default +0 +SELECT @@SESSION.binlog_commit_wait_count as 'no session var'; +ERROR HY000: Variable 'binlog_commit_wait_count' is a GLOBAL variable +SET GLOBAL binlog_commit_wait_count= 0; +SET GLOBAL binlog_commit_wait_count= DEFAULT; +SET GLOBAL binlog_commit_wait_count= 10; +SELECT @@GLOBAL.binlog_commit_wait_count; +@@GLOBAL.binlog_commit_wait_count +10 +SET GLOBAL binlog_commit_wait_count = @save_binlog_commit_wait_count; diff --git a/mysql-test/suite/sys_vars/r/binlog_commit_wait_usec_basic.result b/mysql-test/suite/sys_vars/r/binlog_commit_wait_usec_basic.result new file mode 100644 index 00000000000..b85af0bc9c7 --- /dev/null +++ b/mysql-test/suite/sys_vars/r/binlog_commit_wait_usec_basic.result @@ -0,0 +1,13 @@ +SET @save_binlog_commit_wait_usec= @@GLOBAL.binlog_commit_wait_usec; +SELECT @@GLOBAL.binlog_commit_wait_usec as 'check default'; +check default +100000 +SELECT @@SESSION.binlog_commit_wait_usec as 'no session var'; +ERROR HY000: Variable 'binlog_commit_wait_usec' is a GLOBAL variable +SET GLOBAL binlog_commit_wait_usec= 0; +SET GLOBAL binlog_commit_wait_usec= DEFAULT; +SET GLOBAL binlog_commit_wait_usec= 10000; +SELECT @@GLOBAL.binlog_commit_wait_usec; +@@GLOBAL.binlog_commit_wait_usec +10000 +SET GLOBAL binlog_commit_wait_usec = @save_binlog_commit_wait_usec; diff --git a/mysql-test/suite/sys_vars/r/slave_parallel_threads_basic.result b/mysql-test/suite/sys_vars/r/slave_parallel_threads_basic.result new file mode 100644 index 00000000000..2956d04c065 --- /dev/null +++ b/mysql-test/suite/sys_vars/r/slave_parallel_threads_basic.result @@ -0,0 +1,13 @@ +SET @save_slave_parallel_threads= @@GLOBAL.slave_parallel_threads; +SELECT @@GLOBAL.slave_parallel_threads as 'must be zero because of default'; +must be zero because of default +0 +SELECT @@SESSION.slave_parallel_threads as 'no session var'; +ERROR HY000: Variable 'slave_parallel_threads' is a GLOBAL variable +SET GLOBAL slave_parallel_threads= 0; +SET GLOBAL slave_parallel_threads= DEFAULT; +SET GLOBAL slave_parallel_threads= 10; +SELECT @@GLOBAL.slave_parallel_threads; +@@GLOBAL.slave_parallel_threads +10 +SET GLOBAL slave_parallel_threads = @save_slave_parallel_threads; diff --git a/mysql-test/suite/sys_vars/t/binlog_commit_wait_count_basic.test b/mysql-test/suite/sys_vars/t/binlog_commit_wait_count_basic.test new file mode 100644 index 00000000000..ebce0da77fe --- /dev/null +++ b/mysql-test/suite/sys_vars/t/binlog_commit_wait_count_basic.test @@ -0,0 +1,14 @@ +--source include/not_embedded.inc + +SET @save_binlog_commit_wait_count= @@GLOBAL.binlog_commit_wait_count; + +SELECT @@GLOBAL.binlog_commit_wait_count as 'must be zero because of default'; +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +SELECT @@SESSION.binlog_commit_wait_count as 'no session var'; + +SET GLOBAL binlog_commit_wait_count= 0; +SET GLOBAL binlog_commit_wait_count= DEFAULT; +SET GLOBAL binlog_commit_wait_count= 10; +SELECT @@GLOBAL.binlog_commit_wait_count; + +SET GLOBAL binlog_commit_wait_count = @save_binlog_commit_wait_count; diff --git a/mysql-test/suite/sys_vars/t/binlog_commit_wait_usec_basic.test b/mysql-test/suite/sys_vars/t/binlog_commit_wait_usec_basic.test new file mode 100644 index 00000000000..ad9b6c99630 --- /dev/null +++ b/mysql-test/suite/sys_vars/t/binlog_commit_wait_usec_basic.test @@ -0,0 +1,14 @@ +--source include/not_embedded.inc + +SET @save_binlog_commit_wait_usec= @@GLOBAL.binlog_commit_wait_usec; + +SELECT @@GLOBAL.binlog_commit_wait_usec as 'check default'; +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +SELECT @@SESSION.binlog_commit_wait_usec as 'no session var'; + +SET GLOBAL binlog_commit_wait_usec= 0; +SET GLOBAL binlog_commit_wait_usec= DEFAULT; +SET GLOBAL binlog_commit_wait_usec= 10000; +SELECT @@GLOBAL.binlog_commit_wait_usec; + +SET GLOBAL binlog_commit_wait_usec = @save_binlog_commit_wait_usec; diff --git a/mysql-test/suite/sys_vars/t/slave_parallel_threads_basic.test b/mysql-test/suite/sys_vars/t/slave_parallel_threads_basic.test new file mode 100644 index 00000000000..8e987489d86 --- /dev/null +++ b/mysql-test/suite/sys_vars/t/slave_parallel_threads_basic.test @@ -0,0 +1,14 @@ +--source include/not_embedded.inc + +SET @save_slave_parallel_threads= @@GLOBAL.slave_parallel_threads; + +SELECT @@GLOBAL.slave_parallel_threads as 'must be zero because of default'; +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +SELECT @@SESSION.slave_parallel_threads as 'no session var'; + +SET GLOBAL slave_parallel_threads= 0; +SET GLOBAL slave_parallel_threads= DEFAULT; +SET GLOBAL slave_parallel_threads= 10; +SELECT @@GLOBAL.slave_parallel_threads; + +SET GLOBAL slave_parallel_threads = @save_slave_parallel_threads; diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index 63066d8d7c0..2bb5083a4f3 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -248,7 +248,7 @@ handle_rpl_parallel_thread(void *arg) if (!in_event_group) { rpt->current_entry= NULL; - if (!rpt->free) + if (!rpt->stop && !rpt->free) { mysql_mutex_lock(&rpt->pool->LOCK_rpl_thread_pool); list= rpt->pool->free_list; @@ -262,9 +262,27 @@ handle_rpl_parallel_thread(void *arg) } } - rpt->running= false; + rpt->thd= NULL; mysql_mutex_unlock(&rpt->LOCK_rpl_thread); + thd->clear_error(); + thd->catalog= 0; + thd->reset_query(); + thd->reset_db(NULL, 0); + thd_proc_info(thd, "Slave worker thread exiting"); + thd->temporary_tables= 0; + mysql_mutex_lock(&LOCK_thread_count); + THD_CHECK_SENTRY(thd); + delete thd; + mysql_mutex_unlock(&LOCK_thread_count); + + mysql_mutex_lock(&rpt->LOCK_rpl_thread); + rpt->running= false; + mysql_cond_signal(&rpt->COND_rpl_thread); + mysql_mutex_unlock(&rpt->LOCK_rpl_thread); + + my_thread_end(); + return NULL; } @@ -344,6 +362,7 @@ rpl_parallel_change_thread_count(rpl_parallel_thread_pool *pool, { rpl_parallel_thread *rpt= pool->get_thread(NULL); rpt->stop= true; + mysql_cond_signal(&rpt->COND_rpl_thread); mysql_mutex_unlock(&rpt->LOCK_rpl_thread); } @@ -354,7 +373,9 @@ rpl_parallel_change_thread_count(rpl_parallel_thread_pool *pool, while (rpt->running) mysql_cond_wait(&rpt->COND_rpl_thread, &rpt->LOCK_rpl_thread); mysql_mutex_unlock(&rpt->LOCK_rpl_thread); - delete rpt; + mysql_mutex_destroy(&rpt->LOCK_rpl_thread); + mysql_cond_destroy(&rpt->COND_rpl_thread); + my_free(rpt); } my_free(pool->threads); @@ -386,6 +407,7 @@ err: mysql_mutex_lock(&new_free_list->LOCK_rpl_thread); new_free_list->delay_start= false; new_free_list->stop= true; + mysql_cond_signal(&new_free_list->COND_rpl_thread); while (!new_free_list->running) mysql_cond_wait(&new_free_list->COND_rpl_thread, &new_free_list->LOCK_rpl_thread); diff --git a/sql/sql_binlog.cc b/sql/sql_binlog.cc index bef9a4c3475..df6aab88200 100644 --- a/sql/sql_binlog.cc +++ b/sql/sql_binlog.cc @@ -44,7 +44,6 @@ void mysql_client_binlog_statement(THD* thd) { - struct rpl_group_info *rgi; DBUG_ENTER("mysql_client_binlog_statement"); DBUG_PRINT("info",("binlog base64: '%*s'", (int) (thd->lex->comment.length < 2048 ? @@ -100,6 +99,7 @@ void mysql_client_binlog_statement(THD* thd) const char *error= 0; char *buf= (char *) my_malloc(decoded_len, MYF(MY_WME)); Log_event *ev = 0; + struct rpl_group_info rgi(rli); /* Out of memory check @@ -197,17 +197,8 @@ void mysql_client_binlog_statement(THD* thd) } } - if (!(rgi= rli->group_info)) - { - if (!(rgi= rli->group_info= (struct rpl_group_info *) - my_malloc(sizeof(*rgi), MYF(0)))) - { - my_error(ER_OUTOFMEMORY, MYF(0), sizeof(*rgi)); - goto end; - } - bzero(rgi, sizeof(*rgi)); - } - rgi->rli= rli; + rgi.rli= rli; + rgi.thd= thd; ev= Log_event::read_log_event(bufptr, event_len, &error, rli->relay_log.description_event_for_exec, 0); @@ -244,7 +235,7 @@ void mysql_client_binlog_statement(THD* thd) (ev->flags & LOG_EVENT_SKIP_REPLICATION_F ? OPTION_SKIP_REPLICATION : 0); - err= ev->apply_event(rgi); + err= ev->apply_event(&rgi); thd->variables.option_bits= (thd->variables.option_bits & ~OPTION_SKIP_REPLICATION) | diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 1273bff1750..91f13bebd12 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -1442,11 +1442,9 @@ check_slave_parallel_threads(sys_var *self, THD *thd, set_var *var) { bool running; - mysql_mutex_unlock(&LOCK_global_system_variables); mysql_mutex_lock(&LOCK_active_mi); running= master_info_index->give_error_if_slave_running(); mysql_mutex_unlock(&LOCK_active_mi); - mysql_mutex_lock(&LOCK_global_system_variables); if (running) return true; @@ -1457,17 +1455,18 @@ static bool fix_slave_parallel_threads(sys_var *self, THD *thd, enum_var_type type) { bool running; + bool err= false; mysql_mutex_unlock(&LOCK_global_system_variables); mysql_mutex_lock(&LOCK_active_mi); running= master_info_index->give_error_if_slave_running(); mysql_mutex_unlock(&LOCK_active_mi); - mysql_mutex_lock(&LOCK_global_system_variables); if (running || rpl_parallel_change_thread_count(&global_rpl_thread_pool, opt_slave_parallel_threads)) - return true; + err= true; + mysql_mutex_lock(&LOCK_global_system_variables); - return false; + return err; } @@ -1497,7 +1496,7 @@ static Sys_var_ulong Sys_binlog_commit_wait_count( static Sys_var_ulong Sys_binlog_commit_wait_usec( "binlog_commit_wait_usec", "Maximum time, in microseconds, to wait for more commits to queue up " - " for binlog group commit. Only takes effect if the value of " + "for binlog group commit. Only takes effect if the value of " "binlog_commit_wait_count is non-zero.", GLOBAL_VAR(opt_binlog_commit_wait_usec), CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, ULONG_MAX), DEFAULT(100000), BLOCK_SIZE(1)); From ba4b937af2e3c9118071b1279bc39b6febca73a9 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 12 Jul 2013 14:36:20 +0200 Subject: [PATCH 13/41] MDEV-4506: Parallel replication: Intermediate commit Move the deferred event stuff from Relay_log_info to rpl_group_info to make it thread safe for parallel replication. --- sql/log_event.cc | 20 ++++++------- sql/log_event.h | 10 ------- sql/rpl_parallel.cc | 4 ++- sql/rpl_rli.cc | 7 +++-- sql/rpl_rli.h | 71 +++++++++++++++++++++++---------------------- sql/rpl_utility.cc | 7 ++--- sql/slave.cc | 10 +++---- sql/sql_binlog.cc | 11 ++++--- sql/sql_class.cc | 17 +++++++---- sql/sql_class.h | 4 ++- sql/sql_insert.cc | 8 ++--- sql/sql_load.cc | 6 ++-- 12 files changed, 89 insertions(+), 86 deletions(-) diff --git a/sql/log_event.cc b/sql/log_event.cc index cb7bc3924f5..f07c58f4d6b 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -6716,8 +6716,8 @@ int Intvar_log_event::do_apply_event(struct rpl_group_info *rgi) */ rli->set_flag(Relay_log_info::IN_STMT); - if (rli->deferred_events_collecting) - return rli->deferred_events->add(this); + if (rgi->deferred_events_collecting) + return rgi->deferred_events->add(this); switch (type) { case LAST_INSERT_ID_EVENT: @@ -6827,8 +6827,8 @@ int Rand_log_event::do_apply_event(struct rpl_group_info *rgi) */ const_cast(rli)->set_flag(Relay_log_info::IN_STMT); - if (rli->deferred_events_collecting) - return rli->deferred_events->add(this); + if (rgi->deferred_events_collecting) + return rgi->deferred_events->add(this); thd->rand.seed1= (ulong) seed1; thd->rand.seed2= (ulong) seed2; @@ -6868,14 +6868,14 @@ Rand_log_event::do_shall_skip(Relay_log_info *rli) bool slave_execute_deferred_events(THD *thd) { bool res= false; - Relay_log_info *rli= thd->rli_slave; + rpl_group_info *rgi= thd->rgi_slave; - DBUG_ASSERT(rli && (!rli->deferred_events_collecting || rli->deferred_events)); + DBUG_ASSERT(rgi && (!rgi->deferred_events_collecting || rgi->deferred_events)); - if (!rli->deferred_events_collecting || rli->deferred_events->is_empty()) + if (!rgi->deferred_events_collecting || rgi->deferred_events->is_empty()) return res; - res= rli->deferred_events->execute(rli->group_info); + res= rgi->deferred_events->execute(rgi); return res; } @@ -7423,10 +7423,10 @@ int User_var_log_event::do_apply_event(struct rpl_group_info *rgi) Relay_log_info const *rli= rgi->rli; DBUG_ENTER("User_var_log_event::do_apply_event"); - if (rli->deferred_events_collecting) + if (rgi->deferred_events_collecting) { set_deferred(); - DBUG_RETURN(rli->deferred_events->add(this)); + DBUG_RETURN(rgi->deferred_events->add(this)); } if (!(charset= get_charset(charset_number, MYF(MY_WME)))) diff --git a/sql/log_event.h b/sql/log_event.h index 8bda493a7ec..6d6a330fc48 100644 --- a/sql/log_event.h +++ b/sql/log_event.h @@ -4698,16 +4698,6 @@ bool event_checksum_test(uchar *buf, ulong event_len, uint8 alg); uint8 get_checksum_alg(const char* buf, ulong len); extern TYPELIB binlog_checksum_typelib; -#ifndef MYSQL_CLIENT -/** - The function is called by slave applier in case there are - active table filtering rules to force gathering events associated - with Query-log-event into an array to execute - them once the fate of the Query is determined for execution. -*/ -bool slave_execute_deferred_events(THD *thd); -#endif - /** @} (end of group Replication) */ diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index 2bb5083a4f3..c3c557436cf 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -66,7 +66,7 @@ rpt_handle_event(rpl_parallel_thread::queued_event *qev, Relay_log_info *rli= rgi->rli; THD *thd= rgi->thd; - thd->rli_slave= rli; + thd->rgi_slave= rgi; thd->rpl_filter = rli->mi->rpl_filter; /* ToDo: Get rid of rli->group_info, it is not thread safe. */ rli->group_info= rgi; @@ -574,6 +574,8 @@ rpl_parallel::do_event(struct rpl_group_info *serial_rgi, Log_event *ev) my_error(ER_OUT_OF_RESOURCES, MYF(MY_WME)); return true; } + if ((rgi->deferred_events_collecting= rli->mi->rpl_filter->is_on())) + rgi->deferred_events= new Deferred_log_events(rli); if ((gtid_ev->flags2 & Gtid_log_event::FL_GROUP_COMMIT_ID) && e->last_commit_id == gtid_ev->commit_id) diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index 8fb22266d5e..b96125d41cb 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -60,7 +60,7 @@ Relay_log_info::Relay_log_info(bool is_slave_recovery) inited(0), abort_slave(0), slave_running(0), until_condition(UNTIL_NONE), until_log_pos(0), retried_trans(0), executed_entries(0), group_info(0), tables_to_lock(0), tables_to_lock_count(0), - last_event_start_time(0), deferred_events(NULL),m_flags(0), + last_event_start_time(0), m_flags(0), row_stmt_start_timestamp(0), long_find_row_note_printed(false), m_annotate_event(0) { @@ -1535,7 +1535,8 @@ end: rpl_group_info::rpl_group_info(Relay_log_info *rli_) : rli(rli_), thd(0), gtid_sub_id(0), wait_commit_sub_id(0), - wait_commit_group_info(0), wait_start_sub_id(0), parallel_entry(0) + wait_commit_group_info(0), wait_start_sub_id(0), parallel_entry(0), + deferred_events(NULL) { bzero(¤t_gtid, sizeof(current_gtid)); } @@ -1596,7 +1597,7 @@ delete_or_keep_event_post_apply(Relay_log_info *rli, /* fall through */ default: DBUG_PRINT("info", ("Deleting the event after it has been executed")); - if (!rli->is_deferred_event(ev)) + if (!rli->group_info->is_deferred_event(ev)) delete ev; break; } diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h index 294f2ba885a..07ce0600d94 100644 --- a/sql/rpl_rli.h +++ b/sql/rpl_rli.h @@ -403,41 +403,6 @@ public: The timestamp is set and reset in @c sql_slave_killed(). */ time_t last_event_start_time; - - /* - A container to hold on Intvar-, Rand-, Uservar- log-events in case - the slave is configured with table filtering rules. - The withhold events are executed when their parent Query destiny is - determined for execution as well. - */ - Deferred_log_events *deferred_events; - - /* - State of the container: true stands for IRU events gathering, - false does for execution, either deferred or direct. - */ - bool deferred_events_collecting; - - /* - Returns true if the argument event resides in the containter; - more specifically, the checking is done against the last added event. - */ - bool is_deferred_event(Log_event * ev) - { - return deferred_events_collecting ? deferred_events->is_last(ev) : false; - }; - /* The general cleanup that slave applier may need at the end of query. */ - inline void cleanup_after_query() - { - if (deferred_events) - deferred_events->rewind(); - }; - /* The general cleanup that slave applier may need at the end of session. */ - void cleanup_after_session() - { - if (deferred_events) - delete deferred_events; - }; /** Helper function to do after statement completion. @@ -581,6 +546,7 @@ public: private: + /* ToDo: This must be moved to rpl_group_info. */ uint32 m_flags; /* @@ -645,6 +611,41 @@ struct rpl_group_info rpl_group_info(Relay_log_info *rli_); ~rpl_group_info() { }; + + /* + A container to hold on Intvar-, Rand-, Uservar- log-events in case + the slave is configured with table filtering rules. + The withhold events are executed when their parent Query destiny is + determined for execution as well. + */ + Deferred_log_events *deferred_events; + + /* + State of the container: true stands for IRU events gathering, + false does for execution, either deferred or direct. + */ + bool deferred_events_collecting; + + /* + Returns true if the argument event resides in the containter; + more specifically, the checking is done against the last added event. + */ + bool is_deferred_event(Log_event * ev) + { + return deferred_events_collecting ? deferred_events->is_last(ev) : false; + }; + /* The general cleanup that slave applier may need at the end of query. */ + inline void cleanup_after_query() + { + if (deferred_events) + deferred_events->rewind(); + }; + /* The general cleanup that slave applier may need at the end of session. */ + void cleanup_after_session() + { + if (deferred_events) + delete deferred_events; + }; }; diff --git a/sql/rpl_utility.cc b/sql/rpl_utility.cc index cce8ef99fef..f734b95edc1 100644 --- a/sql/rpl_utility.cc +++ b/sql/rpl_utility.cc @@ -1146,18 +1146,17 @@ bool Deferred_log_events::is_empty() bool Deferred_log_events::execute(struct rpl_group_info *rgi) { bool res= false; - Relay_log_info *rli= rgi->rli; - DBUG_ASSERT(rli->deferred_events_collecting); + DBUG_ASSERT(rgi->deferred_events_collecting); - rli->deferred_events_collecting= false; + rgi->deferred_events_collecting= false; for (uint i= 0; !res && i < array.elements; i++) { Log_event *ev= (* (Log_event **) dynamic_array_ptr(&array, i)); res= ev->apply_event(rgi); } - rli->deferred_events_collecting= true; + rgi->deferred_events_collecting= true; return res; } diff --git a/sql/slave.cc b/sql/slave.cc index b9ef3172364..a26010d75cc 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -4025,10 +4025,10 @@ pthread_handler_t handle_slave_sql(void *arg) goto err_during_init; } thd->init_for_queries(); - thd->rli_slave= rli; - if ((rli->deferred_events_collecting= mi->rpl_filter->is_on())) + thd->rgi_slave= &serial_rgi; + if ((serial_rgi.deferred_events_collecting= mi->rpl_filter->is_on())) { - rli->deferred_events= new Deferred_log_events(rli); + serial_rgi.deferred_events= new Deferred_log_events(rli); } thd->temporary_tables = rli->save_temporary_tables; // restore temp tables @@ -6302,10 +6302,10 @@ bool rpl_master_has_bug(const Relay_log_info *rli, uint bug_id, bool report, */ bool rpl_master_erroneous_autoinc(THD *thd) { - if (thd->rli_slave) + if (thd->rgi_slave) { DBUG_EXECUTE_IF("simulate_bug33029", return TRUE;); - return rpl_master_has_bug(thd->rli_slave, 33029, FALSE, NULL, NULL); + return rpl_master_has_bug(thd->rgi_slave->rli, 33029, FALSE, NULL, NULL); } return FALSE; } diff --git a/sql/sql_binlog.cc b/sql/sql_binlog.cc index df6aab88200..1b6713f1bc3 100644 --- a/sql/sql_binlog.cc +++ b/sql/sql_binlog.cc @@ -80,6 +80,8 @@ void mysql_client_binlog_statement(THD* thd) my_bool have_fd_event= TRUE; int err; Relay_log_info *rli; + struct rpl_group_info *rgi; + rli= thd->rli_fake; if (!rli) { @@ -95,11 +97,12 @@ void mysql_client_binlog_statement(THD* thd) new Format_description_log_event(4); have_fd_event= FALSE; } + if (!(rgi= thd->rgi_fake)) + rgi= thd->rgi_fake= new rpl_group_info(rli); const char *error= 0; char *buf= (char *) my_malloc(decoded_len, MYF(MY_WME)); Log_event *ev = 0; - struct rpl_group_info rgi(rli); /* Out of memory check @@ -197,8 +200,8 @@ void mysql_client_binlog_statement(THD* thd) } } - rgi.rli= rli; - rgi.thd= thd; + rgi->rli= rli; + rgi->thd= thd; ev= Log_event::read_log_event(bufptr, event_len, &error, rli->relay_log.description_event_for_exec, 0); @@ -235,7 +238,7 @@ void mysql_client_binlog_statement(THD* thd) (ev->flags & LOG_EVENT_SKIP_REPLICATION_F ? OPTION_SKIP_REPLICATION : 0); - err= ev->apply_event(&rgi); + err= ev->apply_event(rgi); thd->variables.option_bits= (thd->variables.option_bits & ~OPTION_SKIP_REPLICATION) | diff --git a/sql/sql_class.cc b/sql/sql_class.cc index aec65dc385c..43d810d27d4 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -769,7 +769,7 @@ bool Drop_table_error_handler::handle_condition(THD *thd, THD::THD() :Statement(&main_lex, &main_mem_root, STMT_CONVENTIONAL_EXECUTION, /* statement id */ 0), - rli_fake(0), rli_slave(NULL), + rli_fake(0), rgi_fake(0), rgi_slave(NULL), in_sub_stmt(0), log_all_errors(0), binlog_unsafe_warning_flags(0), binlog_table_maps(0), @@ -1490,6 +1490,11 @@ THD::~THD() dbug_sentry= THD_SENTRY_GONE; #endif #ifndef EMBEDDED_LIBRARY + if (rgi_fake) + { + delete rgi_fake; + rgi_fake= NULL; + } if (rli_fake) { delete rli_fake; @@ -1497,8 +1502,8 @@ THD::~THD() } mysql_audit_free_thd(this); - if (rli_slave) - rli_slave->cleanup_after_session(); + if (rgi_slave) + rgi_slave->cleanup_after_session(); #endif free_root(&main_mem_root, MYF(0)); @@ -1883,7 +1888,7 @@ void THD::cleanup_after_query() which is intended to consume its event (there can be other SET statements between them). */ - if ((rli_slave || rli_fake) && is_update_query(lex->sql_command)) + if ((rgi_slave || rli_fake) && is_update_query(lex->sql_command)) auto_inc_intervals_forced.empty(); #endif } @@ -1905,8 +1910,8 @@ void THD::cleanup_after_query() m_binlog_invoker= FALSE; #ifndef EMBEDDED_LIBRARY - if (rli_slave) - rli_slave->cleanup_after_query(); + if (rgi_slave) + rgi_slave->cleanup_after_query(); #endif DBUG_VOID_RETURN; diff --git a/sql/sql_class.h b/sql/sql_class.h index 3b7cfb42ec7..e7f593db62b 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -47,6 +47,7 @@ class Reprepare_observer; class Relay_log_info; +struct rpl_group_info; class Rpl_filter; class Query_log_event; @@ -1697,8 +1698,9 @@ public: /* Used to execute base64 coded binlog events in MySQL server */ Relay_log_info* rli_fake; + rpl_group_info* rgi_fake; /* Slave applier execution context */ - Relay_log_info* rli_slave; + rpl_group_info* rgi_slave; /* Used to SLAVE SQL thread */ Rpl_filter* rpl_filter; diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc index 511296f3e4b..ac1837a778d 100644 --- a/sql/sql_insert.cc +++ b/sql/sql_insert.cc @@ -810,10 +810,10 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list, table->next_number_field=table->found_next_number_field; #ifdef HAVE_REPLICATION - if (thd->rli_slave && + if (thd->rgi_slave && (info.handle_duplicates == DUP_UPDATE) && (table->next_number_field != NULL) && - rpl_master_has_bug(thd->rli_slave, 24432, TRUE, NULL, NULL)) + rpl_master_has_bug(thd->rgi_slave->rli, 24432, TRUE, NULL, NULL)) goto abort; #endif @@ -3464,10 +3464,10 @@ select_insert::prepare(List &values, SELECT_LEX_UNIT *u) table->next_number_field=table->found_next_number_field; #ifdef HAVE_REPLICATION - if (thd->rli_slave && + if (thd->rgi_slave && (info.handle_duplicates == DUP_UPDATE) && (table->next_number_field != NULL) && - rpl_master_has_bug(thd->rli_slave, 24432, TRUE, NULL, NULL)) + rpl_master_has_bug(thd->rgi_slave->rli, 24432, TRUE, NULL, NULL)) DBUG_RETURN(1); #endif diff --git a/sql/sql_load.cc b/sql/sql_load.cc index 6a4712ca5b5..339820574c2 100644 --- a/sql/sql_load.cc +++ b/sql/sql_load.cc @@ -362,11 +362,11 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, MY_RETURN_REAL_PATH); } - if (thd->rli_slave) + if (thd->rgi_slave) { #if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) - if (strncmp(thd->rli_slave->slave_patternload_file, name, - thd->rli_slave->slave_patternload_file_size)) + if (strncmp(thd->rgi_slave->rli->slave_patternload_file, name, + thd->rgi_slave->rli->slave_patternload_file_size)) { /* LOAD DATA INFILE in the slave SQL Thread can only read from From 47f8e0ef6eb171119b092861ce1196bdedbd834c Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 12 Jul 2013 14:42:48 +0200 Subject: [PATCH 14/41] MDEV-4506: Parallel replication: Intermediate commit Remove Relay_log_info::group_info. (It is not thread safe). --- sql/rpl_parallel.cc | 6 ++---- sql/rpl_rli.cc | 10 +++++----- sql/rpl_rli.h | 4 +--- sql/slave.cc | 9 +-------- 4 files changed, 9 insertions(+), 20 deletions(-) diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index c3c557436cf..7970f15eb49 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -68,8 +68,6 @@ rpt_handle_event(rpl_parallel_thread::queued_event *qev, thd->rgi_slave= rgi; thd->rpl_filter = rli->mi->rpl_filter; - /* ToDo: Get rid of rli->group_info, it is not thread safe. */ - rli->group_info= rgi; /* ToDo: Access to thd, and what about rli, split out a parallel part? */ mysql_mutex_lock(&rli->data_lock); @@ -203,7 +201,7 @@ handle_rpl_parallel_thread(void *arg) !strcmp("ROLLBACK", ((Query_log_event *)events->ev)->query)))); /* ToDo: must use rgi here, not rli, for thread safety. */ - delete_or_keep_event_post_apply(rgi->rli, event_type, events->ev); + delete_or_keep_event_post_apply(rgi, event_type, events->ev); my_free(events); if (end_of_group) @@ -661,7 +659,7 @@ rpl_parallel::do_event(struct rpl_group_info *serial_rgi, Log_event *ev) */ qev->rgi= serial_rgi; rpt_handle_event(qev, NULL); - delete_or_keep_event_post_apply(rli, typ, qev->ev); + delete_or_keep_event_post_apply(serial_rgi, typ, qev->ev); return false; } diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index b96125d41cb..2d38fe2a6f6 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -59,7 +59,7 @@ Relay_log_info::Relay_log_info(bool is_slave_recovery) abort_pos_wait(0), slave_run_id(0), sql_thd(0), inited(0), abort_slave(0), slave_running(0), until_condition(UNTIL_NONE), until_log_pos(0), retried_trans(0), executed_entries(0), - group_info(0), tables_to_lock(0), tables_to_lock_count(0), + tables_to_lock(0), tables_to_lock_count(0), last_event_start_time(0), m_flags(0), row_stmt_start_timestamp(0), long_find_row_note_printed(false), m_annotate_event(0) @@ -1559,7 +1559,7 @@ event_group_new_gtid(rpl_group_info *rgi, Gtid_log_event *gev) void -delete_or_keep_event_post_apply(Relay_log_info *rli, +delete_or_keep_event_post_apply(rpl_group_info *rgi, Log_event_type typ, Log_event *ev) { /* @@ -1583,7 +1583,7 @@ delete_or_keep_event_post_apply(Relay_log_info *rli, The thd->query will be used to generate new Annotate_rows event during applying the subsequent Rows events. */ - rli->set_annotate_event((Annotate_rows_log_event*) ev); + rgi->rli->set_annotate_event((Annotate_rows_log_event*) ev); break; case DELETE_ROWS_EVENT: case UPDATE_ROWS_EVENT: @@ -1593,11 +1593,11 @@ delete_or_keep_event_post_apply(Relay_log_info *rli, event (if any) is not needed anymore and can be deleted. */ if (((Rows_log_event*)ev)->get_flags(Rows_log_event::STMT_END_F)) - rli->free_annotate_event(); + rgi->rli->free_annotate_event(); /* fall through */ default: DBUG_PRINT("info", ("Deleting the event after it has been executed")); - if (!rli->group_info->is_deferred_event(ev)) + if (!rgi->is_deferred_event(ev)) delete ev; break; } diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h index 07ce0600d94..a78741df3e0 100644 --- a/sql/rpl_rli.h +++ b/sql/rpl_rli.h @@ -314,8 +314,6 @@ public: char slave_patternload_file[FN_REFLEN]; size_t slave_patternload_file_size; - /* ToDo: We need to remove this, always use the per-transaction one to work with parallel replication. */ - struct rpl_group_info *group_info; rpl_parallel parallel; Relay_log_info(bool is_slave_recovery); @@ -657,7 +655,7 @@ extern struct rpl_slave_state rpl_global_gtid_slave_state; int rpl_load_gtid_slave_state(THD *thd); int event_group_new_gtid(rpl_group_info *rgi, Gtid_log_event *gev); -void delete_or_keep_event_post_apply(Relay_log_info *rli, +void delete_or_keep_event_post_apply(rpl_group_info *rgi, Log_event_type typ, Log_event *ev); #endif /* RPL_RLI_H */ diff --git a/sql/slave.cc b/sql/slave.cc index a26010d75cc..777ab9c8468 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -3264,7 +3264,7 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, exec_res= apply_event_and_update_pos(ev, thd, serial_rgi, NULL); - delete_or_keep_event_post_apply(rli, typ, ev); + delete_or_keep_event_post_apply(serial_rgi, typ, ev); /* update_log_pos failed: this should not happen, so we don't @@ -4189,13 +4189,6 @@ log '%s' at position %s, relay log '%s' position: %s%s", RPL_LOG_NAME, } mysql_mutex_unlock(&rli->data_lock); - /* - ToDo: Get rid of this, all accesses to rpl_group_info must be made - per-worker-thread to work with parallel replication. - */ - if (opt_slave_parallel_threads <= 0) - rli->group_info= &serial_rgi; - /* Read queries from the IO/THREAD until this thread is killed */ while (!sql_slave_killed(thd,rli)) From 13fddb32dec20bfea058281dcf0e780ca02d5402 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 12 Jul 2013 14:52:05 +0200 Subject: [PATCH 15/41] MDEV-4506: Parallel replication: Intermediate commit. Move annotate-event stuff from Relay_log_info to rpl_group_info, to make it thread safe. --- sql/rpl_rli.cc | 16 ++++++---- sql/rpl_rli.h | 85 +++++++++++++++++++++++++------------------------- 2 files changed, 53 insertions(+), 48 deletions(-) diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index 2d38fe2a6f6..73658d10624 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -61,8 +61,7 @@ Relay_log_info::Relay_log_info(bool is_slave_recovery) until_log_pos(0), retried_trans(0), executed_entries(0), tables_to_lock(0), tables_to_lock_count(0), last_event_start_time(0), m_flags(0), - row_stmt_start_timestamp(0), long_find_row_note_printed(false), - m_annotate_event(0) + row_stmt_start_timestamp(0), long_find_row_note_printed(false) { DBUG_ENTER("Relay_log_info::Relay_log_info"); @@ -112,7 +111,6 @@ Relay_log_info::~Relay_log_info() mysql_cond_destroy(&log_space_cond); mysql_cond_destroy(&sleep_cond); relay_log.cleanup(); - free_annotate_event(); DBUG_VOID_RETURN; } @@ -1536,12 +1534,18 @@ end: rpl_group_info::rpl_group_info(Relay_log_info *rli_) : rli(rli_), thd(0), gtid_sub_id(0), wait_commit_sub_id(0), wait_commit_group_info(0), wait_start_sub_id(0), parallel_entry(0), - deferred_events(NULL) + deferred_events(NULL), m_annotate_event(0) { bzero(¤t_gtid, sizeof(current_gtid)); } +rpl_group_info::~rpl_group_info() +{ + free_annotate_event(); +} + + int event_group_new_gtid(rpl_group_info *rgi, Gtid_log_event *gev) { @@ -1583,7 +1587,7 @@ delete_or_keep_event_post_apply(rpl_group_info *rgi, The thd->query will be used to generate new Annotate_rows event during applying the subsequent Rows events. */ - rgi->rli->set_annotate_event((Annotate_rows_log_event*) ev); + rgi->set_annotate_event((Annotate_rows_log_event*) ev); break; case DELETE_ROWS_EVENT: case UPDATE_ROWS_EVENT: @@ -1593,7 +1597,7 @@ delete_or_keep_event_post_apply(rpl_group_info *rgi, event (if any) is not needed anymore and can be deleted. */ if (((Rows_log_event*)ev)->get_flags(Rows_log_event::STMT_END_F)) - rgi->rli->free_annotate_event(); + rgi->free_annotate_event(); /* fall through */ default: DBUG_PRINT("info", ("Deleting the event after it has been executed")); diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h index a78741df3e0..91c5c65d33b 100644 --- a/sql/rpl_rli.h +++ b/sql/rpl_rli.h @@ -472,43 +472,6 @@ public: (m_flags & (1UL << IN_STMT)); } - /** - Save pointer to Annotate_rows event and switch on the - binlog_annotate_row_events for this sql thread. - To be called when sql thread recieves an Annotate_rows event. - */ - inline void set_annotate_event(Annotate_rows_log_event *event) - { - free_annotate_event(); - m_annotate_event= event; - sql_thd->variables.binlog_annotate_row_events= 1; - } - - /** - Returns pointer to the saved Annotate_rows event or NULL if there is - no saved event. - */ - inline Annotate_rows_log_event* get_annotate_event() - { - return m_annotate_event; - } - - /** - Delete saved Annotate_rows event (if any) and switch off the - binlog_annotate_row_events for this sql thread. - To be called when sql thread has applied the last (i.e. with - STMT_END_F flag) rbr event. - */ - inline void free_annotate_event() - { - if (m_annotate_event) - { - sql_thd->variables.binlog_annotate_row_events= 0; - delete m_annotate_event; - m_annotate_event= 0; - } - } - time_t get_row_stmt_start_timestamp() { return row_stmt_start_timestamp; @@ -553,8 +516,6 @@ private: */ time_t row_stmt_start_timestamp; bool long_find_row_note_printed; - - Annotate_rows_log_event *m_annotate_event; }; @@ -607,9 +568,6 @@ struct rpl_group_info struct rpl_parallel_entry *parallel_entry; - rpl_group_info(Relay_log_info *rli_); - ~rpl_group_info() { }; - /* A container to hold on Intvar-, Rand-, Uservar- log-events in case the slave is configured with table filtering rules. @@ -624,6 +582,11 @@ struct rpl_group_info */ bool deferred_events_collecting; + Annotate_rows_log_event *m_annotate_event; + + rpl_group_info(Relay_log_info *rli_); + ~rpl_group_info(); + /* Returns true if the argument event resides in the containter; more specifically, the checking is done against the last added event. @@ -644,6 +607,44 @@ struct rpl_group_info if (deferred_events) delete deferred_events; }; + + /** + Save pointer to Annotate_rows event and switch on the + binlog_annotate_row_events for this sql thread. + To be called when sql thread recieves an Annotate_rows event. + */ + inline void set_annotate_event(Annotate_rows_log_event *event) + { + free_annotate_event(); + m_annotate_event= event; + this->thd->variables.binlog_annotate_row_events= 1; + } + + /** + Returns pointer to the saved Annotate_rows event or NULL if there is + no saved event. + */ + inline Annotate_rows_log_event* get_annotate_event() + { + return m_annotate_event; + } + + /** + Delete saved Annotate_rows event (if any) and switch off the + binlog_annotate_row_events for this sql thread. + To be called when sql thread has applied the last (i.e. with + STMT_END_F flag) rbr event. + */ + inline void free_annotate_event() + { + if (m_annotate_event) + { + this->thd->variables.binlog_annotate_row_events= 0; + delete m_annotate_event; + m_annotate_event= 0; + } + } + }; From d107bdaa01ad34b1afb4542b981b9b19af499f7b Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 13 Sep 2013 15:09:57 +0200 Subject: [PATCH 16/41] MDEV-4506, parallel replication. Some after-review fixes. --- sql/log.cc | 276 +++++++++++++++++++++++++++++-------------- sql/log.h | 2 +- sql/log_event.cc | 72 ++++++----- sql/log_event.h | 74 ++++++------ sql/log_event_old.cc | 6 +- sql/log_event_old.h | 12 +- sql/rpl_gtid.cc | 2 +- sql/rpl_parallel.cc | 15 ++- sql/rpl_parallel.h | 6 +- sql/rpl_rli.cc | 8 +- sql/rpl_rli.h | 14 ++- sql/rpl_utility.cc | 2 +- sql/slave.cc | 2 +- sql/sql_binlog.cc | 2 +- sql/sql_class.cc | 7 +- sql/sql_class.h | 2 +- 16 files changed, 311 insertions(+), 191 deletions(-) diff --git a/sql/log.cc b/sql/log.cc index 61d4428fc18..763eb4177ea 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -6542,26 +6542,87 @@ MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd, } } + +/* + Put a transaction that is ready to commit in the group commit queue. + The transaction is identified by the ENTRY object passed into this function. + + To facilitate group commit for the binlog, we first queue up ourselves in + this function. Then later the first thread to enter the queue waits for + the LOCK_log mutex, and commits for everyone in the queue once it gets the + lock. Any other threads in the queue just wait for the first one to finish + the commit and wake them up. This way, all transactions in the queue get + committed in a single disk operation. + + The return value of this function is TRUE if queued as the first entry in + the queue (meaning this is the leader), FALSE otherwise. + + The main work in this function is when the commit in one transaction has + been marked to wait for the commit of another transaction to happen + first. This is used to support in-order parallel replication, where + transactions can execute out-of-order but need to be committed in-order with + how they happened on the master. The waiting of one commit on another needs + to be integrated with the group commit queue, to ensure that the waiting + transaction can participate in the same group commit as the waited-for + transaction. + + So when we put a transaction in the queue, we check if there were other + transactions already prepared to commit but just waiting for the first one + to commit. If so, we add those to the queue as well, transitively for all + waiters. +*/ + bool -MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *entry, - wait_for_commit *wfc) +MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *entry) { group_commit_entry *orig_queue; wait_for_commit *list, *cur, *last; + wait_for_commit *wfc; /* - To facilitate group commit for the binlog, we first queue up ourselves in - the group commit queue. Then the first thread to enter the queue waits for - the LOCK_log mutex, and commits for everyone in the queue once it gets the - lock. Any other threads in the queue just wait for the first one to finish - the commit and wake them up. + Check if we need to wait for another transaction to commit before us. - To support in-order parallel replication with group commit, after we add - some transaction to the queue, we check if there were other transactions - already prepared to commit but just waiting for the first one to commit. - If so, we add those to the queue as well, transitively for all waiters. + It is safe to do a quick check without lock first in the case where we do + not have to wait. But if the quick check shows we need to wait, we must do + another safe check under lock, to avoid the race where the other + transaction wakes us up between the check and the wait. */ + wfc= entry->thd->wait_for_commit_ptr; + entry->queued_by_other= false; + if (wfc && wfc->waiting_for_commit) + { + mysql_mutex_lock(&wfc->LOCK_wait_commit); + /* Do an extra check here, this time safely under lock. */ + if (wfc->waiting_for_commit) + { + /* + By setting wfc->opaque_pointer to our own entry, we mark that we are + ready to commit, but waiting for another transaction to commit before + us. + This other transaction may then take over the commit process for us to + get us included in its own group commit. If this happens, the + queued_by_other flag is set. + */ + wfc->opaque_pointer= entry; + do + { + mysql_cond_wait(&wfc->COND_wait_commit, &wfc->LOCK_wait_commit); + } while (wfc->waiting_for_commit); + wfc->opaque_pointer= NULL; + } + mysql_mutex_unlock(&wfc->LOCK_wait_commit); + } + + /* + If the transaction we were waiting for has already put us into the group + commit queue (and possibly already done the entire binlog commit for us), + then there is nothing else to do. + */ + if (entry->queued_by_other) + return false; + + /* Now enqueue ourselves in the group commit queue. */ entry->thd->clear_wakeup_ready(); mysql_mutex_lock(&LOCK_prepare_ordered); orig_queue= group_commit_queue; @@ -6574,6 +6635,23 @@ MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *entry, This would be natural to do with recursion, but we want to avoid potentially unbounded recursion blowing the C stack, so we use the list approach instead. + + We keep a list of all the waiters that need to be processed in `list', + linked through the next_subsequent_commit pointer. Initially this list + contains only the entry passed into this function. + + We process entries in the list one by one. The element currently being + processed is pointed to by `cur`, and the element at the end of the list + is pointed to by `last` (we do not use NULL to terminate the list). + + As we process an element, it is first added to the group_commit_queue. + Then any waiters for that element are added at the end of the list, to + be processed in subsequent iterations. This continues until the list + is exhausted, with all elements ever added eventually processed. + + The end result is a breath-first traversal of the tree of waiters, + re-using the next_subsequent_commit pointers in place of extra stack + space in a recursive traversal. */ list= wfc; cur= list; @@ -6594,6 +6672,12 @@ MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *entry, if (!cur) break; // Can happen if initial entry has no wait_for_commit + /* + Check if this transaction has other transaction waiting for it to commit. + + If so, process the waiting transactions, and their waiters and so on, + transitively. + */ if (cur->subsequent_commits_list) { bool have_lock; @@ -6601,63 +6685,66 @@ MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *entry, mysql_mutex_lock(&cur->LOCK_wait_commit); have_lock= true; + /* + Grab the list, now safely under lock, and process it if still + non-empty. + */ waiter= cur->subsequent_commits_list; - /* Check again, now safely under lock. */ - if (waiter) + cur->subsequent_commits_list= NULL; + while (waiter) { - /* Grab the list of waiters and process it. */ - cur->subsequent_commits_list= NULL; - do + wait_for_commit *next= waiter->next_subsequent_commit; + group_commit_entry *entry2= + (group_commit_entry *)waiter->opaque_pointer; + if (entry2) { - wait_for_commit *next= waiter->next_subsequent_commit; - group_commit_entry *entry2= - (group_commit_entry *)waiter->opaque_pointer; - if (entry2) - { - /* - This is another transaction ready to be written to the binary - log. We can put it into the queue directly, without needing a - separate context switch to the other thread. We just set a flag - so that the other thread will know when it wakes up that it was - already processed. + /* + This is another transaction ready to be written to the binary + log. We can put it into the queue directly, without needing a + separate context switch to the other thread. We just set a flag + so that the other thread will know when it wakes up that it was + already processed. - So put it at the end of the list to be processed in a subsequent - iteration of the outer loop. - */ - entry2->queued_by_other= true; - last->next_subsequent_commit= waiter; - last= waiter; - /* - As a small optimisation, we do not actually need to set - waiter->next_subsequent_commit to NULL, as we can use the - pointer `last' to check for end-of-list. - */ - } - else - { - /* - Wake up the waiting transaction. + So put it at the end of the list to be processed in a subsequent + iteration of the outer loop. + */ + entry2->queued_by_other= true; + last->next_subsequent_commit= waiter; + last= waiter; + /* + As a small optimisation, we do not actually need to set + waiter->next_subsequent_commit to NULL, as we can use the + pointer `last' to check for end-of-list. + */ + } + else + { + /* + Wake up the waiting transaction. - For this, we need to set the "wakeup running" flag and release - the waitee lock to avoid a deadlock, see comments on - THD::wakeup_subsequent_commits2() for details. - */ - if (have_lock) - { - cur->wakeup_subsequent_commits_running= true; - mysql_mutex_unlock(&cur->LOCK_wait_commit); - have_lock= false; - } - waiter->wakeup(); + For this, we need to set the "wakeup running" flag and release + the waitee lock to avoid a deadlock, see comments on + THD::wakeup_subsequent_commits2() for details. + */ + if (have_lock) + { + have_lock= false; + cur->wakeup_subsequent_commits_running= true; + mysql_mutex_unlock(&cur->LOCK_wait_commit); } - waiter= next; - } while (waiter); + waiter->wakeup(); + } + waiter= next; } if (have_lock) mysql_mutex_unlock(&cur->LOCK_wait_commit); } if (cur == last) break; + /* + Move to the next entry in the flattened list of waiting transactions + that still need to be processed transitively. + */ cur= cur->next_subsequent_commit; entry= (group_commit_entry *)cur->opaque_pointer; DBUG_ASSERT(entry != NULL); @@ -6691,31 +6778,7 @@ MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *entry, bool MYSQL_BIN_LOG::write_transaction_to_binlog_events(group_commit_entry *entry) { - wait_for_commit *wfc; - bool is_leader; - - wfc= entry->thd->wait_for_commit_ptr; - entry->queued_by_other= false; - if (wfc && wfc->waiting_for_commit) - { - mysql_mutex_lock(&wfc->LOCK_wait_commit); - /* Do an extra check here, this time safely under lock. */ - if (wfc->waiting_for_commit) - { - wfc->opaque_pointer= entry; - do - { - mysql_cond_wait(&wfc->COND_wait_commit, &wfc->LOCK_wait_commit); - } while (wfc->waiting_for_commit); - wfc->opaque_pointer= NULL; - } - mysql_mutex_unlock(&wfc->LOCK_wait_commit); - } - - if (entry->queued_by_other) - is_leader= false; - else - is_leader= queue_for_group_commit(entry, wfc); + bool is_leader= queue_for_group_commit(entry); /* The first in the queue handles group commit for all; the others just wait @@ -6756,6 +6819,16 @@ MYSQL_BIN_LOG::write_transaction_to_binlog_events(group_commit_entry *entry) if (next) { + /* + Wake up the next thread in the group commit. + + The next thread can be waiting in two different ways, depending on + whether it put itself in the queue, or if it was put in queue by us + because it had to wait for us to commit first. + + So execute the appropriate wakeup, identified by the queued_by_other + field. + */ if (next->queued_by_other) next->thd->wait_for_commit_ptr->wakeup(); else @@ -6840,14 +6913,18 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader) */ mysql_mutex_lock(&LOCK_log); DEBUG_SYNC(leader->thd, "commit_after_get_LOCK_log"); - binlog_id= current_binlog_id; mysql_mutex_lock(&LOCK_prepare_ordered); if (opt_binlog_commit_wait_count) wait_for_sufficient_commits(); + /* + Note that wait_for_sufficient_commits() may have released and + re-acquired the LOCK_log and LOCK_prepare_ordered if it needed to wait. + */ current= group_commit_queue; group_commit_queue= NULL; mysql_mutex_unlock(&LOCK_prepare_ordered); + binlog_id= current_binlog_id; /* As the queue is in reverse order of entering, reverse it. */ last_in_queue= current; @@ -7141,6 +7218,13 @@ MYSQL_BIN_LOG::write_transaction_or_stmt(group_commit_entry *entry, } +/* + Wait for sufficient commits to queue up for group commit, according to the + values of binlog_commit_wait_count and binlog_commit_wait_usec. + + Note that this function may release and re-acquire LOCK_log and + LOCK_prepare_ordered if it needs to wait. +*/ void MYSQL_BIN_LOG::wait_for_sufficient_commits() { @@ -7152,11 +7236,9 @@ MYSQL_BIN_LOG::wait_for_sufficient_commits() mysql_mutex_assert_owner(&LOCK_log); mysql_mutex_assert_owner(&LOCK_prepare_ordered); - count= 0; - for (e= last_head= group_commit_queue; e; e= e->next) - ++count; - if (count >= opt_binlog_commit_wait_count) - return; + for (e= last_head= group_commit_queue, count= 0; e; e= e->next) + if (++count >= opt_binlog_commit_wait_count) + return; mysql_mutex_unlock(&LOCK_log); set_timespec_nsec(wait_until, (ulonglong)1000*opt_binlog_commit_wait_usec); @@ -7178,7 +7260,25 @@ MYSQL_BIN_LOG::wait_for_sufficient_commits() last_head= head; } - mysql_mutex_lock(&LOCK_log); + /* + We must not wait for LOCK_log while holding LOCK_prepare_ordered. + LOCK_log can be held for long periods (eg. we do I/O under it), while + LOCK_prepare_ordered must only be held for short periods. + + In addition, waiting for LOCK_log while holding LOCK_prepare_ordered would + violate locking order of LOCK_log-before-LOCK_prepare_ordered. This could + cause SAFEMUTEX warnings (even if it cannot actually deadlock with current + code, as there can be at most one group commit leader thread at a time). + + So release and re-acquire LOCK_prepare_ordered if we need to wait for the + LOCK_log. + */ + if (mysql_mutex_trylock(&LOCK_log)) + { + mysql_mutex_unlock(&LOCK_prepare_ordered); + mysql_mutex_lock(&LOCK_log); + mysql_mutex_lock(&LOCK_prepare_ordered); + } } diff --git a/sql/log.h b/sql/log.h index efb560dc245..8b5fe17e660 100644 --- a/sql/log.h +++ b/sql/log.h @@ -540,7 +540,7 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG void do_checkpoint_request(ulong binlog_id); void purge(); int write_transaction_or_stmt(group_commit_entry *entry, uint64 commit_id); - bool queue_for_group_commit(group_commit_entry *entry, wait_for_commit *wfc); + bool queue_for_group_commit(group_commit_entry *entry); bool write_transaction_to_binlog_events(group_commit_entry *entry); void trx_group_commit_leader(group_commit_entry *leader); bool is_xidlist_idle_nolock(); diff --git a/sql/log_event.cc b/sql/log_event.cc index f07c58f4d6b..c0a2ebfa365 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -937,7 +937,7 @@ Log_event::Log_event(const char* buf, #ifndef MYSQL_CLIENT #ifdef HAVE_REPLICATION -int Log_event::do_update_pos(struct rpl_group_info *rgi) +int Log_event::do_update_pos(rpl_group_info *rgi) { Relay_log_info *rli= rgi->rli; /* @@ -3756,7 +3756,7 @@ void Query_log_event::print(FILE* file, PRINT_EVENT_INFO* print_event_info) #if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) -int Query_log_event::do_apply_event(struct rpl_group_info *rgi) +int Query_log_event::do_apply_event(rpl_group_info *rgi) { return do_apply_event(rgi, query, q_len); } @@ -3807,8 +3807,8 @@ bool test_if_equal_repl_errors(int expected_error, int actual_error) mismatch. This mismatch could be implemented with a new ER_ code, and to ignore it you would use --slave-skip-errors... */ -int Query_log_event::do_apply_event(struct rpl_group_info *rgi, - const char *query_arg, uint32 q_len_arg) +int Query_log_event::do_apply_event(rpl_group_info *rgi, + const char *query_arg, uint32 q_len_arg) { LEX_STRING new_db; int expected_error,actual_error= 0; @@ -4244,7 +4244,7 @@ end: DBUG_RETURN(thd->is_slave_error); } -int Query_log_event::do_update_pos(struct rpl_group_info *rgi) +int Query_log_event::do_update_pos(rpl_group_info *rgi) { Relay_log_info *rli= rgi->rli; /* @@ -4461,7 +4461,7 @@ bool Start_log_event_v3::write(IO_CACHE* file) other words, no deadlock problem. */ -int Start_log_event_v3::do_apply_event(struct rpl_group_info *rgi) +int Start_log_event_v3::do_apply_event(rpl_group_info *rgi) { DBUG_ENTER("Start_log_event_v3::do_apply_event"); int error= 0; @@ -4810,7 +4810,7 @@ bool Format_description_log_event::write(IO_CACHE* file) #endif #if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) -int Format_description_log_event::do_apply_event(struct rpl_group_info *rgi) +int Format_description_log_event::do_apply_event(rpl_group_info *rgi) { int ret= 0; Relay_log_info const *rli= rgi->rli; @@ -4867,7 +4867,7 @@ int Format_description_log_event::do_apply_event(struct rpl_group_info *rgi) DBUG_RETURN(ret); } -int Format_description_log_event::do_update_pos(struct rpl_group_info *rgi) +int Format_description_log_event::do_update_pos(rpl_group_info *rgi) { Relay_log_info *rli= rgi->rli; if (server_id == (uint32) global_system_variables.server_id) @@ -5516,7 +5516,7 @@ void Load_log_event::set_fields(const char* affected_db, 1 Failure */ -int Load_log_event::do_apply_event(NET* net, struct rpl_group_info *rgi, +int Load_log_event::do_apply_event(NET* net, rpl_group_info *rgi, bool use_rli_only_for_errors) { LEX_STRING new_db; @@ -5919,7 +5919,7 @@ bool Rotate_log_event::write(IO_CACHE* file) @retval 0 ok */ -int Rotate_log_event::do_update_pos(struct rpl_group_info *rgi) +int Rotate_log_event::do_update_pos(rpl_group_info *rgi) { Relay_log_info *rli= rgi->rli; DBUG_ENTER("Rotate_log_event::do_update_pos"); @@ -6096,7 +6096,7 @@ bool Binlog_checkpoint_log_event::write(IO_CACHE *file) Gtid_log_event::Gtid_log_event(const char *buf, uint event_len, const Format_description_log_event *description_event) - : Log_event(buf, description_event), seq_no(0) + : Log_event(buf, description_event), seq_no(0), commit_id(0) { uint8 header_size= description_event->common_header_len; uint8 post_header_len= description_event->post_header_len[GTID_EVENT-1]; @@ -6120,8 +6120,6 @@ Gtid_log_event::Gtid_log_event(const char *buf, uint event_len, ++buf; commit_id= uint8korr(buf); } - else - commit_id= 0; } @@ -6254,7 +6252,7 @@ Gtid_log_event::pack_info(THD *thd, Protocol *protocol) static char gtid_begin_string[] = "BEGIN"; int -Gtid_log_event::do_apply_event(struct rpl_group_info *rgi) +Gtid_log_event::do_apply_event(rpl_group_info *rgi) { thd->variables.server_id= this->server_id; thd->variables.gtid_domain_id= this->domain_id; @@ -6295,7 +6293,7 @@ Gtid_log_event::do_apply_event(struct rpl_group_info *rgi) int -Gtid_log_event::do_update_pos(struct rpl_group_info *rgi) +Gtid_log_event::do_update_pos(rpl_group_info *rgi) { Relay_log_info *rli= rgi->rli; rli->inc_event_relay_log_pos(); @@ -6477,7 +6475,7 @@ Gtid_list_log_event::write(IO_CACHE *file) int -Gtid_list_log_event::do_apply_event(struct rpl_group_info *rgi) +Gtid_list_log_event::do_apply_event(rpl_group_info *rgi) { Relay_log_info const *rli= rgi->rli; int ret= Log_event::do_apply_event(rgi); @@ -6707,7 +6705,7 @@ void Intvar_log_event::print(FILE* file, PRINT_EVENT_INFO* print_event_info) Intvar_log_event::do_apply_event() */ -int Intvar_log_event::do_apply_event(struct rpl_group_info *rgi) +int Intvar_log_event::do_apply_event(rpl_group_info *rgi) { Relay_log_info *rli= rgi->rli; /* @@ -6731,7 +6729,7 @@ int Intvar_log_event::do_apply_event(struct rpl_group_info *rgi) return 0; } -int Intvar_log_event::do_update_pos(struct rpl_group_info *rgi) +int Intvar_log_event::do_update_pos(rpl_group_info *rgi) { Relay_log_info *rli= rgi->rli; rli->inc_event_relay_log_pos(); @@ -6818,7 +6816,7 @@ void Rand_log_event::print(FILE* file, PRINT_EVENT_INFO* print_event_info) #if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) -int Rand_log_event::do_apply_event(struct rpl_group_info *rgi) +int Rand_log_event::do_apply_event(rpl_group_info *rgi) { Relay_log_info const *rli= rgi->rli; /* @@ -6835,7 +6833,7 @@ int Rand_log_event::do_apply_event(struct rpl_group_info *rgi) return 0; } -int Rand_log_event::do_update_pos(struct rpl_group_info *rgi) +int Rand_log_event::do_update_pos(rpl_group_info *rgi) { Relay_log_info *rli= rgi->rli; rli->inc_event_relay_log_pos(); @@ -6950,7 +6948,7 @@ void Xid_log_event::print(FILE* file, PRINT_EVENT_INFO* print_event_info) #if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) -int Xid_log_event::do_apply_event(struct rpl_group_info *rgi) +int Xid_log_event::do_apply_event(rpl_group_info *rgi) { bool res; int err; @@ -7416,7 +7414,7 @@ void User_var_log_event::print(FILE* file, PRINT_EVENT_INFO* print_event_info) */ #if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) -int User_var_log_event::do_apply_event(struct rpl_group_info *rgi) +int User_var_log_event::do_apply_event(rpl_group_info *rgi) { Item *it= 0; CHARSET_INFO *charset; @@ -7505,7 +7503,7 @@ int User_var_log_event::do_apply_event(struct rpl_group_info *rgi) DBUG_RETURN(0); } -int User_var_log_event::do_update_pos(struct rpl_group_info *rgi) +int User_var_log_event::do_update_pos(rpl_group_info *rgi) { Relay_log_info *rli= rgi->rli; rli->inc_event_relay_log_pos(); @@ -7682,7 +7680,7 @@ Slave_log_event::Slave_log_event(const char* buf, #ifndef MYSQL_CLIENT -int Slave_log_event::do_apply_event(struct rpl_group_info *rgi) +int Slave_log_event::do_apply_event(rpl_group_info *rgi) { if (mysql_bin_log.is_open()) return mysql_bin_log.write(this); @@ -7726,7 +7724,7 @@ void Stop_log_event::print(FILE* file, PRINT_EVENT_INFO* print_event_info) Start_log_event_v3::do_apply_event(), not here. Because if we come here, the master was sane. */ -int Stop_log_event::do_update_pos(struct rpl_group_info *rgi) +int Stop_log_event::do_update_pos(rpl_group_info *rgi) { Relay_log_info *rli= rgi->rli; /* @@ -7958,7 +7956,7 @@ void Create_file_log_event::pack_info(THD *thd, Protocol *protocol) */ #if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) -int Create_file_log_event::do_apply_event(struct rpl_group_info *rgi) +int Create_file_log_event::do_apply_event(rpl_group_info *rgi) { char proc_info[17+FN_REFLEN+10], *fname_buf; char *ext; @@ -8140,7 +8138,7 @@ int Append_block_log_event::get_create_or_append() const Append_block_log_event::do_apply_event() */ -int Append_block_log_event::do_apply_event(struct rpl_group_info *rgi) +int Append_block_log_event::do_apply_event(rpl_group_info *rgi) { char proc_info[17+FN_REFLEN+10], *fname= proc_info+17; int fd; @@ -8291,7 +8289,7 @@ void Delete_file_log_event::pack_info(THD *thd, Protocol *protocol) */ #if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) -int Delete_file_log_event::do_apply_event(struct rpl_group_info *rgi) +int Delete_file_log_event::do_apply_event(rpl_group_info *rgi) { char fname[FN_REFLEN+10]; Relay_log_info const *rli= rgi->rli; @@ -8391,7 +8389,7 @@ void Execute_load_log_event::pack_info(THD *thd, Protocol *protocol) Execute_load_log_event::do_apply_event() */ -int Execute_load_log_event::do_apply_event(struct rpl_group_info *rgi) +int Execute_load_log_event::do_apply_event(rpl_group_info *rgi) { char fname[FN_REFLEN+10]; char *ext; @@ -8664,7 +8662,7 @@ void Execute_load_query_log_event::pack_info(THD *thd, Protocol *protocol) int -Execute_load_query_log_event::do_apply_event(struct rpl_group_info *rgi) +Execute_load_query_log_event::do_apply_event(rpl_group_info *rgi) { char *p; char *buf; @@ -9072,7 +9070,7 @@ int Rows_log_event::do_add_row_data(uchar *row_data, size_t length) #endif #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) -int Rows_log_event::do_apply_event(struct rpl_group_info *rgi) +int Rows_log_event::do_apply_event(rpl_group_info *rgi) { Relay_log_info const *rli= rgi->rli; DBUG_ENTER("Rows_log_event::do_apply_event(Relay_log_info*)"); @@ -9538,7 +9536,7 @@ static int rows_event_stmt_cleanup(Relay_log_info const *rli, THD * thd) @retval non-zero Error in the statement commit */ int -Rows_log_event::do_update_pos(struct rpl_group_info *rgi) +Rows_log_event::do_update_pos(rpl_group_info *rgi) { Relay_log_info *rli= rgi->rli; DBUG_ENTER("Rows_log_event::do_update_pos"); @@ -9777,7 +9775,7 @@ void Annotate_rows_log_event::print(FILE *file, PRINT_EVENT_INFO *pinfo) #endif #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) -int Annotate_rows_log_event::do_apply_event(struct rpl_group_info *rgi) +int Annotate_rows_log_event::do_apply_event(rpl_group_info *rgi) { m_save_thd_query_txt= thd->query(); m_save_thd_query_len= thd->query_length(); @@ -9787,7 +9785,7 @@ int Annotate_rows_log_event::do_apply_event(struct rpl_group_info *rgi) #endif #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) -int Annotate_rows_log_event::do_update_pos(struct rpl_group_info *rgi) +int Annotate_rows_log_event::do_update_pos(rpl_group_info *rgi) { Relay_log_info *rli= rgi->rli; rli->inc_event_relay_log_pos(); @@ -10296,7 +10294,7 @@ check_table_map(Relay_log_info const *rli, RPL_TABLE_LIST *table_list) DBUG_RETURN(res); } -int Table_map_log_event::do_apply_event(struct rpl_group_info *rgi) +int Table_map_log_event::do_apply_event(rpl_group_info *rgi) { RPL_TABLE_LIST *table_list; char *db_mem, *tname_mem; @@ -10415,7 +10413,7 @@ Table_map_log_event::do_shall_skip(Relay_log_info *rli) return continue_group(rli); } -int Table_map_log_event::do_update_pos(struct rpl_group_info *rgi) +int Table_map_log_event::do_update_pos(rpl_group_info *rgi) { Relay_log_info *rli= rgi->rli; rli->inc_event_relay_log_pos(); @@ -11847,7 +11845,7 @@ Incident_log_event::print(FILE *file, #if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) int -Incident_log_event::do_apply_event(struct rpl_group_info *rgi) +Incident_log_event::do_apply_event(rpl_group_info *rgi) { Relay_log_info const *rli= rgi->rli; DBUG_ENTER("Incident_log_event::do_apply_event"); diff --git a/sql/log_event.h b/sql/log_event.h index 6d6a330fc48..1dc7f516727 100644 --- a/sql/log_event.h +++ b/sql/log_event.h @@ -1317,7 +1317,7 @@ public: @see do_apply_event */ - int apply_event(struct rpl_group_info *rgi) + int apply_event(rpl_group_info *rgi) { return do_apply_event(rgi); } @@ -1331,7 +1331,7 @@ public: @see do_update_pos */ - int update_pos(struct rpl_group_info *rgi) + int update_pos(rpl_group_info *rgi) { return do_update_pos(rgi); } @@ -1432,7 +1432,7 @@ protected: @retval 0 Event applied successfully @retval errno Error code if event application failed */ - virtual int do_apply_event(struct rpl_group_info *rgi) + virtual int do_apply_event(rpl_group_info *rgi) { return 0; /* Default implementation does nothing */ } @@ -1461,7 +1461,7 @@ protected: 1). Observe that handler errors are returned by the do_apply_event() function, and not by this one. */ - virtual int do_update_pos(struct rpl_group_info *rgi); + virtual int do_update_pos(rpl_group_info *rgi); /** @@ -1986,10 +1986,10 @@ public: public: /* !!! Public in this patch to allow old usage */ #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); - virtual int do_apply_event(struct rpl_group_info *rgi); - virtual int do_update_pos(struct rpl_group_info *rgi); + virtual int do_apply_event(rpl_group_info *rgi); + virtual int do_update_pos(rpl_group_info *rgi); - int do_apply_event(struct rpl_group_info *rgi, + int do_apply_event(rpl_group_info *rgi, const char *query_arg, uint32 q_len_arg); static bool peek_is_commit_rollback(const char *event_start, @@ -2103,7 +2103,7 @@ public: private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) - virtual int do_apply_event(struct rpl_group_info *rgi); + virtual int do_apply_event(rpl_group_info *rgi); #endif }; @@ -2416,12 +2416,12 @@ public: public: /* !!! Public in this patch to allow old usage */ #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) - virtual int do_apply_event(struct rpl_group_info *rgi) + virtual int do_apply_event(rpl_group_info *rgi) { return do_apply_event(thd->slave_net,rgi,0); } - int do_apply_event(NET *net, struct rpl_group_info *rgi, + int do_apply_event(NET *net, rpl_group_info *rgi, bool use_rli_only_for_errors); #endif }; @@ -2500,7 +2500,7 @@ public: protected: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) - virtual int do_apply_event(struct rpl_group_info *rgi); + virtual int do_apply_event(rpl_group_info *rgi); virtual enum_skip_reason do_shall_skip(Relay_log_info*) { /* @@ -2596,8 +2596,8 @@ public: static bool is_version_before_checksum(const master_version_split *version_split); protected: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) - virtual int do_apply_event(struct rpl_group_info *rgi); - virtual int do_update_pos(struct rpl_group_info *rgi); + virtual int do_apply_event(rpl_group_info *rgi); + virtual int do_update_pos(rpl_group_info *rgi); virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); #endif }; @@ -2675,8 +2675,8 @@ Intvar_log_event(THD* thd_arg,uchar type_arg, ulonglong val_arg, private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) - virtual int do_apply_event(struct rpl_group_info *rgi); - virtual int do_update_pos(struct rpl_group_info *rgi); + virtual int do_apply_event(rpl_group_info *rgi); + virtual int do_update_pos(rpl_group_info *rgi); virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); #endif }; @@ -2754,8 +2754,8 @@ class Rand_log_event: public Log_event private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) - virtual int do_apply_event(struct rpl_group_info *rgi); - virtual int do_update_pos(struct rpl_group_info *rgi); + virtual int do_apply_event(rpl_group_info *rgi); + virtual int do_update_pos(rpl_group_info *rgi); virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); #endif }; @@ -2803,7 +2803,7 @@ class Xid_log_event: public Log_event private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) - virtual int do_apply_event(struct rpl_group_info *rgi); + virtual int do_apply_event(rpl_group_info *rgi); enum_skip_reason do_shall_skip(Relay_log_info *rli); #endif }; @@ -2870,8 +2870,8 @@ public: private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) - virtual int do_apply_event(struct rpl_group_info *rgi); - virtual int do_update_pos(struct rpl_group_info *rgi); + virtual int do_apply_event(rpl_group_info *rgi); + virtual int do_update_pos(rpl_group_info *rgi); virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); #endif }; @@ -2905,7 +2905,7 @@ public: private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) - virtual int do_update_pos(struct rpl_group_info *rgi); + virtual int do_update_pos(rpl_group_info *rgi); virtual enum_skip_reason do_shall_skip(Relay_log_info *rli) { /* @@ -3007,7 +3007,7 @@ public: private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) - virtual int do_update_pos(struct rpl_group_info *rgi); + virtual int do_update_pos(rpl_group_info *rgi); virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); #endif }; @@ -3119,8 +3119,8 @@ public: uint16 flags, bool is_transactional, uint64 commit_id); #ifdef HAVE_REPLICATION void pack_info(THD *thd, Protocol *protocol); - virtual int do_apply_event(struct rpl_group_info *rgi); - virtual int do_update_pos(struct rpl_group_info *rgi); + virtual int do_apply_event(rpl_group_info *rgi); + virtual int do_update_pos(rpl_group_info *rgi); virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); #endif #else @@ -3249,7 +3249,7 @@ public: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) bool to_packet(String *packet); bool write(IO_CACHE *file); - virtual int do_apply_event(struct rpl_group_info *rgi); + virtual int do_apply_event(rpl_group_info *rgi); #endif static bool peek(const char *event_start, uint32 event_len, uint8 checksum_alg, @@ -3328,7 +3328,7 @@ public: private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) - virtual int do_apply_event(struct rpl_group_info *rgi); + virtual int do_apply_event(rpl_group_info *rgi); #endif }; @@ -3383,7 +3383,7 @@ public: private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) - virtual int do_apply_event(struct rpl_group_info *rgi); + virtual int do_apply_event(rpl_group_info *rgi); #endif }; @@ -3424,7 +3424,7 @@ public: private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) - virtual int do_apply_event(struct rpl_group_info *rgi); + virtual int do_apply_event(rpl_group_info *rgi); #endif }; @@ -3464,7 +3464,7 @@ public: private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) - virtual int do_apply_event(struct rpl_group_info *rgi); + virtual int do_apply_event(rpl_group_info *rgi); #endif }; @@ -3563,7 +3563,7 @@ public: private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) - virtual int do_apply_event(struct rpl_group_info *rgi); + virtual int do_apply_event(rpl_group_info *rgi); #endif }; @@ -3635,8 +3635,8 @@ public: #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) private: - virtual int do_apply_event(struct rpl_group_info *rgi); - virtual int do_update_pos(struct rpl_group_info *rgi); + virtual int do_apply_event(rpl_group_info *rgi); + virtual int do_update_pos(rpl_group_info *rgi); virtual enum_skip_reason do_shall_skip(Relay_log_info*); #endif @@ -4050,8 +4050,8 @@ public: private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) - virtual int do_apply_event(struct rpl_group_info *rgi); - virtual int do_update_pos(struct rpl_group_info *rgi); + virtual int do_apply_event(rpl_group_info *rgi); + virtual int do_update_pos(rpl_group_info *rgi); virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); #endif @@ -4278,8 +4278,8 @@ protected: private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) - virtual int do_apply_event(struct rpl_group_info *rgi); - virtual int do_update_pos(struct rpl_group_info *rgi); + virtual int do_apply_event(rpl_group_info *rgi); + virtual int do_update_pos(rpl_group_info *rgi); virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); /* @@ -4612,7 +4612,7 @@ public: #endif #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) - virtual int do_apply_event(struct rpl_group_info *rgi); + virtual int do_apply_event(rpl_group_info *rgi); #endif virtual bool write_data_header(IO_CACHE *file); diff --git a/sql/log_event_old.cc b/sql/log_event_old.cc index d3e9d47d64a..db1b3fb5a9f 100644 --- a/sql/log_event_old.cc +++ b/sql/log_event_old.cc @@ -36,7 +36,7 @@ // Old implementation of do_apply_event() int -Old_rows_log_event::do_apply_event(Old_rows_log_event *ev, struct rpl_group_info *rgi) +Old_rows_log_event::do_apply_event(Old_rows_log_event *ev, rpl_group_info *rgi) { DBUG_ENTER("Old_rows_log_event::do_apply_event(st_relay_log_info*)"); int error= 0; @@ -1451,7 +1451,7 @@ int Old_rows_log_event::do_add_row_data(uchar *row_data, size_t length) #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) -int Old_rows_log_event::do_apply_event(struct rpl_group_info *rgi) +int Old_rows_log_event::do_apply_event(rpl_group_info *rgi) { DBUG_ENTER("Old_rows_log_event::do_apply_event(Relay_log_info*)"); int error= 0; @@ -1834,7 +1834,7 @@ Old_rows_log_event::do_shall_skip(Relay_log_info *rli) } int -Old_rows_log_event::do_update_pos(struct rpl_group_info *rgi) +Old_rows_log_event::do_update_pos(rpl_group_info *rgi) { Relay_log_info *rli= rgi->rli; DBUG_ENTER("Old_rows_log_event::do_update_pos"); diff --git a/sql/log_event_old.h b/sql/log_event_old.h index ad51349ef80..7c35b875dc4 100644 --- a/sql/log_event_old.h +++ b/sql/log_event_old.h @@ -214,8 +214,8 @@ protected: private: #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) - virtual int do_apply_event(struct rpl_group_info *rgi); - virtual int do_update_pos(struct rpl_group_info *rgi); + virtual int do_apply_event(rpl_group_info *rgi); + virtual int do_update_pos(rpl_group_info *rgi); virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); /* @@ -275,7 +275,7 @@ private: #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) - int do_apply_event(Old_rows_log_event*, struct rpl_group_info *rgi); + int do_apply_event(Old_rows_log_event*, rpl_group_info *rgi); /* Primitive to prepare for a sequence of row executions. @@ -403,7 +403,7 @@ private: #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) // use old definition of do_apply_event() - virtual int do_apply_event(struct rpl_group_info *rgi) + virtual int do_apply_event(rpl_group_info *rgi) { return Old_rows_log_event::do_apply_event(this, rgi); } // primitives for old version of do_apply_event() @@ -481,7 +481,7 @@ private: #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) // use old definition of do_apply_event() - virtual int do_apply_event(struct rpl_group_info *rgi) + virtual int do_apply_event(rpl_group_info *rgi) { return Old_rows_log_event::do_apply_event(this, rgi); } // primitives for old version of do_apply_event() @@ -556,7 +556,7 @@ private: #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) // use old definition of do_apply_event() - virtual int do_apply_event(struct rpl_group_info *rgi) + virtual int do_apply_event(rpl_group_info *rgi) { return Old_rows_log_event::do_apply_event(this, rgi); } // primitives for old version of do_apply_event() diff --git a/sql/rpl_gtid.cc b/sql/rpl_gtid.cc index bc826e9bdb5..a1b14ad3255 100644 --- a/sql/rpl_gtid.cc +++ b/sql/rpl_gtid.cc @@ -62,7 +62,7 @@ rpl_slave_state::update_state_hash(uint64 sub_id, rpl_gtid *gtid) int -rpl_slave_state::record_and_update_gtid(THD *thd, struct rpl_group_info *rgi) +rpl_slave_state::record_and_update_gtid(THD *thd, rpl_group_info *rgi) { uint64 sub_id; diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index 7970f15eb49..7cf2c9162ff 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -62,7 +62,7 @@ rpt_handle_event(rpl_parallel_thread::queued_event *qev, struct rpl_parallel_thread *rpt) { int err; - struct rpl_group_info *rgi= qev->rgi; + rpl_group_info *rgi= qev->rgi; Relay_log_info *rli= rgi->rli; THD *thd= rgi->thd; @@ -128,8 +128,9 @@ handle_rpl_parallel_thread(void *arg) old_msg= thd->proc_info; thd->enter_cond(&rpt->COND_rpl_thread, &rpt->LOCK_rpl_thread, "Waiting for work from SQL thread"); - while (!rpt->stop && !thd->killed && !(events= rpt->event_queue)) + while (!(events= rpt->event_queue) && !rpt->stop && !thd->killed) mysql_cond_wait(&rpt->COND_rpl_thread, &rpt->LOCK_rpl_thread); + /* Mark that this thread is now executing */ rpt->free= false; rpt->event_queue= rpt->last_in_queue= NULL; thd->exit_cond(old_msg); @@ -145,9 +146,15 @@ handle_rpl_parallel_thread(void *arg) uint64 wait_start_sub_id; bool end_of_group; + /* Handle a new event group, which will be initiated by a GTID event. */ if (event_type == GTID_EVENT) { in_event_group= true; + /* + If the standalone flag is set, then this event group consists of a + single statement (possibly preceeded by some Intvar_log_event and + similar), without any terminating COMMIT/ROLLBACK/XID. + */ group_standalone= (0 != (static_cast(events->ev)->flags2 & Gtid_log_event::FL_STANDALONE)); @@ -540,12 +547,12 @@ rpl_parallel::wait_for_done() bool -rpl_parallel::do_event(struct rpl_group_info *serial_rgi, Log_event *ev) +rpl_parallel::do_event(rpl_group_info *serial_rgi, Log_event *ev) { rpl_parallel_entry *e; rpl_parallel_thread *cur_thread; rpl_parallel_thread::queued_event *qev; - struct rpl_group_info *rgi; + rpl_group_info *rgi; Relay_log_info *rli= serial_rgi->rli; enum Log_event_type typ; diff --git a/sql/rpl_parallel.h b/sql/rpl_parallel.h index 304263c3477..adbb1a18526 100644 --- a/sql/rpl_parallel.h +++ b/sql/rpl_parallel.h @@ -23,7 +23,7 @@ struct rpl_parallel_thread { struct queued_event { queued_event *next; Log_event *ev; - struct rpl_group_info *rgi; + rpl_group_info *rgi; } *event_queue, *last_in_queue; }; @@ -59,7 +59,7 @@ struct rpl_parallel_entry { mysql_mutex_t LOCK_parallel_entry; mysql_cond_t COND_parallel_entry; uint64 current_sub_id; - struct rpl_group_info *current_group_info; + rpl_group_info *current_group_info; /* The sub_id of the last event group in the previous batch of group-committed transactions. @@ -78,7 +78,7 @@ struct rpl_parallel { ~rpl_parallel(); rpl_parallel_entry *find(uint32 domain_id); void wait_for_done(); - bool do_event(struct rpl_group_info *serial_rgi, Log_event *ev); + bool do_event(rpl_group_info *serial_rgi, Log_event *ev); }; diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index 73658d10624..49547718230 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -1193,7 +1193,7 @@ bool Relay_log_info::cached_charset_compare(char *charset) const void Relay_log_info::stmt_done(my_off_t event_master_log_pos, time_t event_creation_time, THD *thd, - struct rpl_group_info *rgi) + rpl_group_info *rgi) { #ifndef DBUG_OFF extern uint debug_not_change_ts_if_art_event; @@ -1265,6 +1265,11 @@ void Relay_log_info::cleanup_context(THD *thd, bool error) { DBUG_ENTER("Relay_log_info::cleanup_context"); + /* + In parallel replication, different THDs can be used from different + parallel threads. But in single-threaded mode, only the THD of the main + SQL thread is allowed. + */ DBUG_ASSERT(opt_slave_parallel_threads > 0 || sql_thd == thd); /* 1) Instances of Table_map_log_event, if ::do_apply_event() was called on them, @@ -1552,6 +1557,7 @@ event_group_new_gtid(rpl_group_info *rgi, Gtid_log_event *gev) uint64 sub_id= rpl_global_gtid_slave_state.next_subid(gev->domain_id); if (!sub_id) { + /* Out of memory caused hash insertion to fail. */ return 1; } rgi->gtid_sub_id= sub_id; diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h index 91c5c65d33b..4d954d1c8aa 100644 --- a/sql/rpl_rli.h +++ b/sql/rpl_rli.h @@ -422,7 +422,7 @@ public: */ void stmt_done(my_off_t event_log_pos, time_t event_creation_time, THD *thd, - struct rpl_group_info *rgi); + rpl_group_info *rgi); /** @@ -521,10 +521,14 @@ private: /* This is data for various state needed to be kept for the processing of - one event group in the SQL thread. + one event group (transaction) during replication. - For single-threaded replication it is linked from the RLI, for parallel - replication it is linked into each event group being executed in parallel. + In single-threaded replication, there will be one global rpl_group_info and + one global Relay_log_info per master connection. They will be linked + together. + + In parallel replication, there will be one rpl_group_info object for + each running thd. All rpl_group_info will share the same Relay_log_info. */ struct rpl_group_info { @@ -555,7 +559,7 @@ struct rpl_group_info for the wrong commit). */ uint64 wait_commit_sub_id; - struct rpl_group_info *wait_commit_group_info; + rpl_group_info *wait_commit_group_info; /* If non-zero, the event group must wait for this sub_id to be committed before the execution of the event group is allowed to start. diff --git a/sql/rpl_utility.cc b/sql/rpl_utility.cc index f734b95edc1..40fda63f396 100644 --- a/sql/rpl_utility.cc +++ b/sql/rpl_utility.cc @@ -1143,7 +1143,7 @@ bool Deferred_log_events::is_empty() return array.elements == 0; } -bool Deferred_log_events::execute(struct rpl_group_info *rgi) +bool Deferred_log_events::execute(rpl_group_info *rgi) { bool res= false; diff --git a/sql/slave.cc b/sql/slave.cc index 777ab9c8468..e0cc595213d 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -3019,7 +3019,7 @@ static int has_temporary_error(THD *thd) ev->update_pos(). */ int apply_event_and_update_pos(Log_event* ev, THD* thd, - struct rpl_group_info *rgi, + rpl_group_info *rgi, rpl_parallel_thread *rpt) { int exec_res= 0; diff --git a/sql/sql_binlog.cc b/sql/sql_binlog.cc index 1b6713f1bc3..04cb4adcb2c 100644 --- a/sql/sql_binlog.cc +++ b/sql/sql_binlog.cc @@ -80,7 +80,7 @@ void mysql_client_binlog_statement(THD* thd) my_bool have_fd_event= TRUE; int err; Relay_log_info *rli; - struct rpl_group_info *rgi; + rpl_group_info *rgi; rli= thd->rli_fake; if (!rli) diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 43d810d27d4..66b28c87ac9 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -5666,6 +5666,10 @@ wait_for_commit::register_wait_for_prior_commit(wait_for_commit *waitee) waiting_for_commit= false; else { + /* + Put ourself at the head of the waitee's list of transactions that must + wait for it to commit first. + */ this->next_subsequent_commit= waitee->subsequent_commits_list; waitee->subsequent_commits_list= this; } @@ -5704,7 +5708,7 @@ wait_for_commit::wait_for_prior_commit2() The waiter needs to lock the waitee to delete itself from the list in unregister_wait_for_prior_commit(). Thus wakeup_subsequent_commits() can not - hold its own lock while locking waiters, lest we deadlock. + hold its own lock while locking waiters, as this could lead to deadlock. So we need to prevent unregister_wait_for_prior_commit() running while wakeup is in progress - otherwise the unregister could complete before the wakeup, @@ -5727,6 +5731,7 @@ wait_for_commit::wait_for_prior_commit2() would not be woken up until next wakeup, which could be potentially much later than necessary. */ + void wait_for_commit::wakeup_subsequent_commits2() { diff --git a/sql/sql_class.h b/sql/sql_class.h index e7f593db62b..c34c100171d 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -1615,7 +1615,7 @@ struct wait_for_commit */ bool waiting_for_commit; /* - Flag set when wakeup_subsequent_commits_running() is active, see commonts + Flag set when wakeup_subsequent_commits_running() is active, see comments on that function for details. */ bool wakeup_subsequent_commits_running; From 5633dd822711a269098bdb127c76c4b1250fcf8d Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 16 Sep 2013 14:33:49 +0200 Subject: [PATCH 17/41] MDEV-4506: parallel replication. Add a simple test case. Fix bugs found. --- mysql-test/suite/rpl/r/rpl_parallel.result | 45 ++++ mysql-test/suite/rpl/t/rpl_parallel.test | 122 +++++----- mysql-test/suite/rpl/t/rpl_parallel2.test | 70 ------ sql/log_event.cc | 90 ++++---- sql/log_event.h | 16 +- sql/log_event_old.cc | 97 ++++---- sql/log_event_old.h | 24 +- sql/rpl_parallel.cc | 14 +- sql/rpl_record.cc | 10 +- sql/rpl_record.h | 4 +- sql/rpl_record_old.cc | 6 +- sql/rpl_record_old.h | 2 +- sql/rpl_rli.cc | 251 ++++++++++----------- sql/rpl_rli.h | 49 ++-- sql/slave.cc | 18 +- sql/sql_binlog.cc | 2 +- 16 files changed, 409 insertions(+), 411 deletions(-) create mode 100644 mysql-test/suite/rpl/r/rpl_parallel.result delete mode 100644 mysql-test/suite/rpl/t/rpl_parallel2.test diff --git a/mysql-test/suite/rpl/r/rpl_parallel.result b/mysql-test/suite/rpl/r/rpl_parallel.result new file mode 100644 index 00000000000..e60b9406b8e --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_parallel.result @@ -0,0 +1,45 @@ +include/rpl_init.inc [topology=1->2] +SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads; +SET GLOBAL slave_parallel_threads=10; +ERROR HY000: This operation cannot be performed as you have a running slave ''; run STOP SLAVE '' first +include/stop_slave.inc +SET GLOBAL slave_parallel_threads=10; +CHANGE MASTER TO master_use_gtid=slave_pos; +include/start_slave.inc +*** Test long-running query in domain 1 can run in parallel with short queries in domain 0 *** +CREATE TABLE t1 (a int PRIMARY KEY) ENGINE=MyISAM; +CREATE TABLE t2 (a int PRIMARY KEY) ENGINE=InnoDB; +INSERT INTO t1 VALUES (1); +INSERT INTO t2 VALUES (1); +LOCK TABLE t1 WRITE; +SET gtid_domain_id=1; +INSERT INTO t1 VALUES (2); +SET gtid_domain_id=0; +INSERT INTO t2 VALUES (2); +INSERT INTO t2 VALUES (3); +BEGIN; +INSERT INTO t2 VALUES (4); +INSERT INTO t2 VALUES (5); +COMMIT; +INSERT INTO t2 VALUES (6); +SELECT * FROM t2 ORDER by a; +a +1 +2 +3 +4 +5 +6 +SELECT * FROM t1; +a +1 +UNLOCK TABLES; +SELECT * FROM t1 ORDER BY a; +a +1 +2 +include/stop_slave.inc +SET GLOBAL slave_parallel_threads=@old_parallel_threads; +include/start_slave.inc +DROP TABLE t1,t2; +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_parallel.test b/mysql-test/suite/rpl/t/rpl_parallel.test index 3ace346e006..b9ba88489e4 100644 --- a/mysql-test/suite/rpl/t/rpl_parallel.test +++ b/mysql-test/suite/rpl/t/rpl_parallel.test @@ -1,60 +1,74 @@ ---source include/have_binlog_format_statement.inc +--source include/have_innodb.inc +--source include/have_debug.inc +--source include/have_debug_sync.inc +--let $rpl_topology=1->2 +--source include/rpl_init.inc -connect (s1,127.0.0.1,root,,test,$MASTER_MYPORT,); -connect (s2,127.0.0.1,root,,test,$SLAVE_MYPORT,); - ---connection s1 -SELECT @@server_id; -SET sql_log_bin=0; -CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=MyISAM; -SET sql_log_bin=1; - ---connection s2 -SELECT @@server_id; -SET sql_log_bin=0; -CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=MyISAM; -SET sql_log_bin=1; - ---replace_result $MASTER_MYPORT MASTER_PORT -eval CHANGE MASTER TO master_host = '127.0.0.1', master_port = $MASTER_MYPORT, - master_user='root', master_use_gtid=current_pos; - ---connection s1 -SET gtid_domain_id=0; -INSERT INTO t1 VALUES (1); -SET gtid_domain_id=1; -INSERT INTO t1 VALUES (2); -SET gtid_domain_id=2; -INSERT INTO t1 VALUES (3); -SET gtid_domain_id=0; -INSERT INTO t1 VALUES (4); -SET gtid_domain_id=1; -INSERT INTO t1 VALUES (5); -SET gtid_domain_id=2; -INSERT INTO t1 VALUES (6); -SET gtid_domain_id=0; -INSERT INTO t1 VALUES (7); -SET gtid_domain_id=1; -INSERT INTO t1 VALUES (8); -SET gtid_domain_id=2; -INSERT INTO t1 VALUES (9); - ---connection s2 -query_vertical SHOW SLAVE STATUS; - ---source include/start_slave.inc -SELECT * FROM t1; +# Test various aspects of parallel replication. +--connection server_2 +SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads; +--error ER_SLAVE_MUST_STOP +SET GLOBAL slave_parallel_threads=10; --source include/stop_slave.inc +SET GLOBAL slave_parallel_threads=10; +CHANGE MASTER TO master_use_gtid=slave_pos; +--source include/start_slave.inc + + +--echo *** Test long-running query in domain 1 can run in parallel with short queries in domain 0 *** + +--connection server_1 +CREATE TABLE t1 (a int PRIMARY KEY) ENGINE=MyISAM; +CREATE TABLE t2 (a int PRIMARY KEY) ENGINE=InnoDB; +INSERT INTO t1 VALUES (1); +INSERT INTO t2 VALUES (1); +--save_master_pos + +--connection server_2 +--sync_with_master + +# Block the table t1 to simulate a replicated query taking a long time. +--connect (con_temp,127.0.0.1,root,,test,$SERVER_MYPORT_2,) +LOCK TABLE t1 WRITE; + +--connection server_1 +SET gtid_domain_id=1; +# This query will be blocked on the slave until UNLOCK TABLES. +INSERT INTO t1 VALUES (2); +SET gtid_domain_id=0; +# These t2 queries can be replicated in parallel with the prior t1 query, as +# they are in a separate replication domain. +INSERT INTO t2 VALUES (2); +INSERT INTO t2 VALUES (3); +BEGIN; +INSERT INTO t2 VALUES (4); +INSERT INTO t2 VALUES (5); +COMMIT; +INSERT INTO t2 VALUES (6); + +--connection server_2 +--let $wait_condition= SELECT COUNT(*) = 6 FROM t2 +--source include/wait_condition.inc + +SELECT * FROM t2 ORDER by a; + +--connection con_temp SELECT * FROM t1; +UNLOCK TABLES; ---connection s1 -SET sql_log_bin=0; -DROP TABLE t1; -SET sql_log_bin=1; +--connection server_2 +--let $wait_condition= SELECT COUNT(*) = 2 FROM t1 +--source include/wait_condition.inc ---connection s2 -RESET SLAVE ALL; -SET sql_log_bin=0; -DROP TABLE t1; -SET sql_log_bin=1; +SELECT * FROM t1 ORDER BY a; + +--connection server_2 +--source include/stop_slave.inc +SET GLOBAL slave_parallel_threads=@old_parallel_threads; +--source include/start_slave.inc + +--connection server_1 +DROP TABLE t1,t2; + +--source include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_parallel2.test b/mysql-test/suite/rpl/t/rpl_parallel2.test deleted file mode 100644 index b3f970c909a..00000000000 --- a/mysql-test/suite/rpl/t/rpl_parallel2.test +++ /dev/null @@ -1,70 +0,0 @@ ---source include/have_binlog_format_statement.inc ---source include/have_xtradb.inc - -connect (m1,127.0.0.1,root,,test,$MASTER_MYPORT,); -connect (m2,127.0.0.1,root,,test,$MASTER_MYPORT,); -connect (m3,127.0.0.1,root,,test,$MASTER_MYPORT,); -connect (m4,127.0.0.1,root,,test,$MASTER_MYPORT,); -connect (s1,127.0.0.1,root,,test,$SLAVE_MYPORT,); -connect (s2,127.0.0.1,root,,test,$SLAVE_MYPORT,); -connect (s3,127.0.0.1,root,,test,$SLAVE_MYPORT,); -connect (s4,127.0.0.1,root,,test,$SLAVE_MYPORT,); - ---connection m1 -SELECT @@server_id; -SET sql_log_bin=0; -CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB; -SET sql_log_bin=1; -SET @old_count= @@GLOBAL.binlog_commit_wait_count; -SET @old_usec= @@GLOBAL.binlog_commit_wait_usec; -SET GLOBAL binlog_commit_wait_usec = 30*1000000; - ---connection s1 -SELECT @@server_id; -SET sql_log_bin=0; -CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB; -SET sql_log_bin=1; - ---replace_result $MASTER_MYPORT MASTER_PORT -eval CHANGE MASTER TO master_host = '127.0.0.1', master_port = $MASTER_MYPORT, - master_user='root', master_use_gtid=current_pos; - ---connection m1 -SET GLOBAL binlog_commit_wait_count = 4; - -send INSERT INTO t1 VALUES (1); - ---connection m2 -send INSERT INTO t1 VALUES (2); ---connection m3 -send INSERT INTO t1 VALUES (3); ---connection m4 -INSERT INTO t1 VALUES (4); ---connection m1 -reap; ---connection m2 -reap; ---connection m3 -reap; - ---connection m1 -SHOW BINLOG EVENTS; - ---connection s1 ---source include/start_slave.inc -SELECT * FROM t1; ---source include/stop_slave.inc -SELECT * FROM t1; - ---connection m1 -SET sql_log_bin=0; -DROP TABLE t1; -SET sql_log_bin=1; -SET GLOBAL binlog_commit_wait_count= @old_count; -SET GLOBAL binlog_commit_wait_usec= @old_usec; - ---connection s1 -RESET SLAVE ALL; -SET sql_log_bin=0; -DROP TABLE t1; -SET sql_log_bin=1; diff --git a/sql/log_event.cc b/sql/log_event.cc index c0a2ebfa365..cfbdd6aa626 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -130,7 +130,7 @@ const ulong checksum_version_product_mariadb= checksum_version_split_mariadb[2]; #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) -static int rows_event_stmt_cleanup(Relay_log_info const *rli, THD* thd); +static int rows_event_stmt_cleanup(rpl_group_info *rgi, THD* thd); static const char *HA_ERR(int i) { @@ -3854,7 +3854,7 @@ int Query_log_event::do_apply_event(rpl_group_info *rgi, DBUG_PRINT("info", ("log_pos: %lu", (ulong) log_pos)); clear_all_errors(thd, const_cast(rli)); - if (strcmp("COMMIT", query) == 0 && rli->tables_to_lock) + if (strcmp("COMMIT", query) == 0 && rgi->tables_to_lock) { /* Cleaning-up the last statement context: @@ -3863,7 +3863,7 @@ int Query_log_event::do_apply_event(rpl_group_info *rgi, */ int error; char llbuff[22]; - if ((error= rows_event_stmt_cleanup(const_cast(rli), thd))) + if ((error= rows_event_stmt_cleanup(rgi, thd))) { const_cast(rli)->report(ERROR_LEVEL, error, "Error in cleaning up after an event preceeding the commit; " @@ -3883,7 +3883,7 @@ int Query_log_event::do_apply_event(rpl_group_info *rgi, } else { - const_cast(rli)->slave_close_thread_tables(thd); + rgi->slave_close_thread_tables(thd); } /* @@ -4835,7 +4835,7 @@ int Format_description_log_event::do_apply_event(rpl_group_info *rgi) "or ROLLBACK in relay log). A probable cause is that " "the master died while writing the transaction to " "its binary log, thus rolled back too."); - const_cast(rli)->cleanup_context(thd, 1); + rgi->cleanup_context(thd, 1); } /* @@ -5533,7 +5533,7 @@ int Load_log_event::do_apply_event(NET* net, rpl_group_info *rgi, clear_all_errors(thd, const_cast(rli)); /* see Query_log_event::do_apply_event() and BUG#13360 */ - DBUG_ASSERT(!rli->m_table_map.count()); + DBUG_ASSERT(!rgi->m_table_map.count()); /* Usually lex_start() is called by mysql_parse(), but we need it here as the present method does not call mysql_parse(). @@ -9089,7 +9089,7 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi) */ DBUG_ASSERT(get_flags(STMT_END_F)); - const_cast(rli)->slave_close_thread_tables(thd); + rgi->slave_close_thread_tables(thd); thd->clear_error(); DBUG_RETURN(0); } @@ -9151,7 +9151,7 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi) /* A small test to verify that objects have consistent types */ DBUG_ASSERT(sizeof(thd->variables.option_bits) == sizeof(OPTION_RELAXED_UNIQUE_CHECKS)); - if (open_and_lock_tables(thd, rli->tables_to_lock, FALSE, 0)) + if (open_and_lock_tables(thd, rgi->tables_to_lock, FALSE, 0)) { uint actual_error= thd->stmt_da->sql_errno(); if (thd->is_slave_error || thd->is_fatal_error) @@ -9168,7 +9168,7 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi) "unexpected success or fatal error")); thd->is_slave_error= 1; } - const_cast(rli)->slave_close_thread_tables(thd); + rgi->slave_close_thread_tables(thd); DBUG_RETURN(actual_error); } @@ -9182,7 +9182,7 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi) { DBUG_PRINT("debug", ("Checking compability of tables to lock - tables_to_lock: %p", - rli->tables_to_lock)); + rgi->tables_to_lock)); /** When using RBR and MyISAM MERGE tables the base tables that make @@ -9196,8 +9196,8 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi) NOTE: The base tables are added here are removed when close_thread_tables is called. */ - RPL_TABLE_LIST *ptr= rli->tables_to_lock; - for (uint i= 0 ; ptr && (i < rli->tables_to_lock_count); + RPL_TABLE_LIST *ptr= rgi->tables_to_lock; + for (uint i= 0 ; ptr && (i < rgi->tables_to_lock_count); ptr= static_cast(ptr->next_global), i++) { DBUG_ASSERT(ptr->m_tabledef_valid); @@ -9213,7 +9213,7 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi) having severe errors which should not be skiped. */ thd->is_slave_error= 1; - const_cast(rli)->slave_close_thread_tables(thd); + rgi->slave_close_thread_tables(thd); DBUG_RETURN(ERR_BAD_TABLE_DEF); } DBUG_PRINT("debug", ("Table: %s.%s is compatible with master" @@ -9238,18 +9238,18 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi) Rows_log_event, we can invalidate the query cache for the associated table. */ - TABLE_LIST *ptr= rli->tables_to_lock; - for (uint i=0 ; ptr && (i < rli->tables_to_lock_count); ptr= ptr->next_global, i++) - const_cast(rli)->m_table_map.set_table(ptr->table_id, ptr->table); + TABLE_LIST *ptr= rgi->tables_to_lock; + for (uint i=0 ; ptr && (i < rgi->tables_to_lock_count); ptr= ptr->next_global, i++) + rgi->m_table_map.set_table(ptr->table_id, ptr->table); #ifdef HAVE_QUERY_CACHE - query_cache.invalidate_locked_for_write(thd, rli->tables_to_lock); + query_cache.invalidate_locked_for_write(thd, rgi->tables_to_lock); #endif } TABLE* table= - m_table= const_cast(rli)->m_table_map.get_table(m_table_id); + m_table= rgi->m_table_map.get_table(m_table_id); DBUG_PRINT("debug", ("m_table: 0x%lx, m_table_id: %lu", (ulong) m_table, m_table_id)); @@ -9331,7 +9331,7 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi) if (!table->in_use) table->in_use= thd; - error= do_exec_row(rli); + error= do_exec_row(rgi); if (error) DBUG_PRINT("info", ("error: %s", HA_ERR(error))); @@ -9371,7 +9371,7 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi) (ulong) m_curr_row, (ulong) m_curr_row_end, (ulong) m_rows_end)); if (!m_curr_row_end && !error) - error= unpack_current_row(rli); + error= unpack_current_row(rgi); // at this moment m_curr_row_end should be set DBUG_ASSERT(error || m_curr_row_end != NULL); @@ -9432,7 +9432,7 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi) DBUG_RETURN(error); } - if (get_flags(STMT_END_F) && (error= rows_event_stmt_cleanup(rli, thd))) + if (get_flags(STMT_END_F) && (error= rows_event_stmt_cleanup(rgi, thd))) slave_rows_error_report(ERROR_LEVEL, thd->is_error() ? 0 : error, rli, thd, table, @@ -9466,7 +9466,7 @@ Rows_log_event::do_shall_skip(Relay_log_info *rli) @retval non-zero Error at the commit. */ -static int rows_event_stmt_cleanup(Relay_log_info const *rli, THD * thd) +static int rows_event_stmt_cleanup(rpl_group_info *rgi, THD * thd) { int error; { @@ -9520,7 +9520,7 @@ static int rows_event_stmt_cleanup(Relay_log_info const *rli, THD * thd) */ thd->reset_current_stmt_binlog_format_row(); - const_cast(rli)->cleanup_context(thd, 0); + rgi->cleanup_context(thd, 0); } return error; } @@ -10259,10 +10259,11 @@ enum enum_tbl_map_status rli->tables_to_lock. */ static enum_tbl_map_status -check_table_map(Relay_log_info const *rli, RPL_TABLE_LIST *table_list) +check_table_map(rpl_group_info *rgi, RPL_TABLE_LIST *table_list) { DBUG_ENTER("check_table_map"); enum_tbl_map_status res= OK_TO_PROCESS; + Relay_log_info *rli= rgi->rli; if (rli->sql_thd->slave_thread /* filtering is for slave only */ && (!rli->mi->rpl_filter->db_ok(table_list->db) || @@ -10270,8 +10271,8 @@ check_table_map(Relay_log_info const *rli, RPL_TABLE_LIST *table_list) res= FILTERED_OUT; else { - RPL_TABLE_LIST *ptr= static_cast(rli->tables_to_lock); - for(uint i=0 ; ptr && (i< rli->tables_to_lock_count); + RPL_TABLE_LIST *ptr= static_cast(rgi->tables_to_lock); + for(uint i=0 ; ptr && (i< rgi->tables_to_lock_count); ptr= static_cast(ptr->next_local), i++) { if (ptr->table_id == table_list->table_id) @@ -10303,7 +10304,6 @@ int Table_map_log_event::do_apply_event(rpl_group_info *rgi) Rpl_filter *filter; Relay_log_info const *rli= rgi->rli; DBUG_ENTER("Table_map_log_event::do_apply_event(Relay_log_info*)"); - DBUG_ASSERT(rli->sql_thd == thd); /* Step the query id to mark what columns that are actually used. */ thd->set_query_id(next_query_id()); @@ -10328,7 +10328,7 @@ int Table_map_log_event::do_apply_event(rpl_group_info *rgi) table_list->updating= 1; table_list->required_type= FRMTYPE_TABLE; DBUG_PRINT("debug", ("table: %s is mapped to %u", table_list->table_name, table_list->table_id)); - enum_tbl_map_status tblmap_status= check_table_map(rli, table_list); + enum_tbl_map_status tblmap_status= check_table_map(rgi, table_list); if (tblmap_status == OK_TO_PROCESS) { DBUG_ASSERT(thd->lex->query_tables != table_list); @@ -10354,9 +10354,9 @@ int Table_map_log_event::do_apply_event(rpl_group_info *rgi) We record in the slave's information that the table should be locked by linking the table into the list of tables to lock. */ - table_list->next_global= table_list->next_local= rli->tables_to_lock; - const_cast(rli)->tables_to_lock= table_list; - const_cast(rli)->tables_to_lock_count++; + table_list->next_global= table_list->next_local= rgi->tables_to_lock; + rgi->tables_to_lock= table_list; + rgi->tables_to_lock_count++; /* 'memory' is freed in clear_tables_to_lock */ } else // FILTERED_OUT, SAME_ID_MAPPING_* @@ -10709,7 +10709,7 @@ is_duplicate_key_error(int errcode) */ int -Rows_log_event::write_row(const Relay_log_info *const rli, +Rows_log_event::write_row(rpl_group_info *rgi, const bool overwrite) { DBUG_ENTER("write_row"); @@ -10724,7 +10724,7 @@ Rows_log_event::write_row(const Relay_log_info *const rli, table->file->ht->db_type != DB_TYPE_NDBCLUSTER); /* unpack row into table->record[0] */ - if ((error= unpack_current_row(rli))) + if ((error= unpack_current_row(rgi))) DBUG_RETURN(error); if (m_curr_row == m_rows_buf) @@ -10841,7 +10841,7 @@ Rows_log_event::write_row(const Relay_log_info *const rli, if (!get_flags(COMPLETE_ROWS_F)) { restore_record(table,record[1]); - error= unpack_current_row(rli); + error= unpack_current_row(rgi); } #ifndef DBUG_OFF @@ -10907,10 +10907,10 @@ Rows_log_event::write_row(const Relay_log_info *const rli, #endif int -Write_rows_log_event::do_exec_row(const Relay_log_info *const rli) +Write_rows_log_event::do_exec_row(rpl_group_info *rgi) { DBUG_ASSERT(m_table != NULL); - int error= write_row(rli, slave_exec_mode == SLAVE_EXEC_MODE_IDEMPOTENT); + int error= write_row(rgi, slave_exec_mode == SLAVE_EXEC_MODE_IDEMPOTENT); if (error && !thd->is_error()) { @@ -11214,7 +11214,7 @@ void issue_long_find_row_warning(Log_event_type type, for any following update/delete command. */ -int Rows_log_event::find_row(const Relay_log_info *rli) +int Rows_log_event::find_row(rpl_group_info *rgi) { DBUG_ENTER("Rows_log_event::find_row"); @@ -11232,7 +11232,7 @@ int Rows_log_event::find_row(const Relay_log_info *rli) */ prepare_record(table, m_width, FALSE); - error= unpack_current_row(rli); + error= unpack_current_row(rgi); #ifndef DBUG_OFF DBUG_PRINT("info",("looking for the following record")); @@ -11497,7 +11497,7 @@ int Rows_log_event::find_row(const Relay_log_info *rli) end: if (is_table_scan || is_index_scan) issue_long_find_row_warning(get_type_code(), m_table->alias.c_ptr(), - is_index_scan, rli); + is_index_scan, rgi->rli); table->default_column_bitmaps(); DBUG_RETURN(error); } @@ -11565,12 +11565,12 @@ Delete_rows_log_event::do_after_row_operations(const Slave_reporting_capability return error; } -int Delete_rows_log_event::do_exec_row(const Relay_log_info *const rli) +int Delete_rows_log_event::do_exec_row(rpl_group_info *rgi) { int error; DBUG_ASSERT(m_table != NULL); - if (!(error= find_row(rli))) + if (!(error= find_row(rgi))) { /* Delete the record found, located in record[0] @@ -11691,11 +11691,11 @@ Update_rows_log_event::do_after_row_operations(const Slave_reporting_capability } int -Update_rows_log_event::do_exec_row(const Relay_log_info *const rli) +Update_rows_log_event::do_exec_row(rpl_group_info *rgi) { DBUG_ASSERT(m_table != NULL); - int error= find_row(rli); + int error= find_row(rgi); if (error) { /* @@ -11703,7 +11703,7 @@ Update_rows_log_event::do_exec_row(const Relay_log_info *const rli) able to skip to the next pair of updates */ m_curr_row= m_curr_row_end; - unpack_current_row(rli); + unpack_current_row(rgi); return error; } @@ -11722,7 +11722,7 @@ Update_rows_log_event::do_exec_row(const Relay_log_info *const rli) m_curr_row= m_curr_row_end; /* this also updates m_curr_row_end */ - if ((error= unpack_current_row(rli))) + if ((error= unpack_current_row(rgi))) goto err; /* diff --git a/sql/log_event.h b/sql/log_event.h index 1dc7f516727..d689ebcd582 100644 --- a/sql/log_event.h +++ b/sql/log_event.h @@ -4256,16 +4256,16 @@ protected: uint m_key_nr; /* Key number */ int find_key(); // Find a best key to use in find_row() - int find_row(const Relay_log_info *const); - int write_row(const Relay_log_info *const, const bool); + int find_row(rpl_group_info *); + int write_row(rpl_group_info *, const bool); // Unpack the current row into m_table->record[0] - int unpack_current_row(const Relay_log_info *const rli) + int unpack_current_row(rpl_group_info *rgi) { DBUG_ASSERT(m_table); ASSERT_OR_RETURN_ERROR(m_curr_row < m_rows_end, HA_ERR_CORRUPT_EVENT); - int const result= ::unpack_row(rli, m_table, m_width, m_curr_row, + int const result= ::unpack_row(rgi, m_table, m_width, m_curr_row, m_rows_end, &m_cols, &m_curr_row_end, &m_master_reclength); if (m_curr_row_end > m_rows_end) @@ -4331,7 +4331,7 @@ private: 0 if execution succeeded, 1 if execution failed. */ - virtual int do_exec_row(const Relay_log_info *const rli) = 0; + virtual int do_exec_row(rpl_group_info *rli) = 0; #endif /* defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) */ friend class Old_rows_log_event; @@ -4387,7 +4387,7 @@ private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) virtual int do_before_row_operations(const Slave_reporting_capability *const); virtual int do_after_row_operations(const Slave_reporting_capability *const,int); - virtual int do_exec_row(const Relay_log_info *const); + virtual int do_exec_row(rpl_group_info *); #endif }; @@ -4461,7 +4461,7 @@ protected: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) virtual int do_before_row_operations(const Slave_reporting_capability *const); virtual int do_after_row_operations(const Slave_reporting_capability *const,int); - virtual int do_exec_row(const Relay_log_info *const); + virtual int do_exec_row(rpl_group_info *); #endif /* defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) */ }; @@ -4526,7 +4526,7 @@ protected: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) virtual int do_before_row_operations(const Slave_reporting_capability *const); virtual int do_after_row_operations(const Slave_reporting_capability *const,int); - virtual int do_exec_row(const Relay_log_info *const); + virtual int do_exec_row(rpl_group_info *); #endif }; diff --git a/sql/log_event_old.cc b/sql/log_event_old.cc index db1b3fb5a9f..58f299dabe7 100644 --- a/sql/log_event_old.cc +++ b/sql/log_event_old.cc @@ -58,7 +58,7 @@ Old_rows_log_event::do_apply_event(Old_rows_log_event *ev, rpl_group_info *rgi) */ DBUG_ASSERT(ev->get_flags(Old_rows_log_event::STMT_END_F)); - const_cast(rli)->slave_close_thread_tables(ev_thd); + rgi->slave_close_thread_tables(ev_thd); ev_thd->clear_error(); DBUG_RETURN(0); } @@ -98,7 +98,7 @@ Old_rows_log_event::do_apply_event(Old_rows_log_event *ev, rpl_group_info *rgi) */ ev_thd->lex->set_stmt_row_injection(); - if (open_and_lock_tables(ev_thd, rli->tables_to_lock, FALSE, 0)) + if (open_and_lock_tables(ev_thd, rgi->tables_to_lock, FALSE, 0)) { uint actual_error= ev_thd->stmt_da->sql_errno(); if (ev_thd->is_slave_error || ev_thd->is_fatal_error) @@ -113,7 +113,7 @@ Old_rows_log_event::do_apply_event(Old_rows_log_event *ev, rpl_group_info *rgi) "unexpected success or fatal error")); ev_thd->is_slave_error= 1; } - const_cast(rli)->slave_close_thread_tables(thd); + rgi->slave_close_thread_tables(thd); DBUG_RETURN(actual_error); } @@ -126,8 +126,8 @@ Old_rows_log_event::do_apply_event(Old_rows_log_event *ev, rpl_group_info *rgi) */ { - RPL_TABLE_LIST *ptr= rli->tables_to_lock; - for (uint i= 0 ; ptr&& (i< rli->tables_to_lock_count); + RPL_TABLE_LIST *ptr= rgi->tables_to_lock; + for (uint i= 0 ; ptr&& (i< rgi->tables_to_lock_count); ptr= static_cast(ptr->next_global), i++) { DBUG_ASSERT(ptr->m_tabledef_valid); @@ -136,7 +136,7 @@ Old_rows_log_event::do_apply_event(Old_rows_log_event *ev, rpl_group_info *rgi) ptr->table, &conv_table)) { ev_thd->is_slave_error= 1; - const_cast(rli)->slave_close_thread_tables(ev_thd); + rgi->slave_close_thread_tables(ev_thd); DBUG_RETURN(Old_rows_log_event::ERR_BAD_TABLE_DEF); } DBUG_PRINT("debug", ("Table: %s.%s is compatible with master" @@ -161,15 +161,15 @@ Old_rows_log_event::do_apply_event(Old_rows_log_event *ev, rpl_group_info *rgi) Old_rows_log_event, we can invalidate the query cache for the associated table. */ - TABLE_LIST *ptr= rli->tables_to_lock; - for (uint i=0; ptr && (i < rli->tables_to_lock_count); ptr= ptr->next_global, i++) - const_cast(rli)->m_table_map.set_table(ptr->table_id, ptr->table); + TABLE_LIST *ptr= rgi->tables_to_lock; + for (uint i=0; ptr && (i < rgi->tables_to_lock_count); ptr= ptr->next_global, i++) + rgi->m_table_map.set_table(ptr->table_id, ptr->table); #ifdef HAVE_QUERY_CACHE - query_cache.invalidate_locked_for_write(thd, rli->tables_to_lock); + query_cache.invalidate_locked_for_write(thd, rgi->tables_to_lock); #endif } - TABLE* table= const_cast(rli)->m_table_map.get_table(ev->m_table_id); + TABLE* table= rgi->m_table_map.get_table(ev->m_table_id); if (table) { @@ -220,7 +220,7 @@ Old_rows_log_event::do_apply_event(Old_rows_log_event *ev, rpl_group_info *rgi) while (error == 0 && row_start < ev->m_rows_end) { uchar const *row_end= NULL; - if ((error= do_prepare_row(ev_thd, rli, table, row_start, &row_end))) + if ((error= do_prepare_row(ev_thd, rgi, table, row_start, &row_end))) break; // We should perform the after-row operation even in // the case of error @@ -280,7 +280,7 @@ Old_rows_log_event::do_apply_event(Old_rows_log_event *ev, rpl_group_info *rgi) rollback at the caller along with sbr. */ ev_thd->reset_current_stmt_binlog_format_row(); - const_cast(rli)->cleanup_context(ev_thd, error); + rgi->cleanup_context(ev_thd, error); ev_thd->is_slave_error= 1; DBUG_RETURN(error); } @@ -953,7 +953,7 @@ int Write_rows_log_event_old::do_after_row_operations(TABLE *table, int error) int Write_rows_log_event_old::do_prepare_row(THD *thd_arg, - Relay_log_info const *rli, + rpl_group_info *rgi, TABLE *table, uchar const *row_start, uchar const **row_end) @@ -962,7 +962,7 @@ Write_rows_log_event_old::do_prepare_row(THD *thd_arg, DBUG_ASSERT(row_start && row_end); int error; - error= unpack_row_old(const_cast(rli), + error= unpack_row_old(rgi, table, m_width, table->record[0], row_start, m_rows_end, &m_cols, row_end, &m_master_reclength, @@ -1037,7 +1037,7 @@ int Delete_rows_log_event_old::do_after_row_operations(TABLE *table, int error) int Delete_rows_log_event_old::do_prepare_row(THD *thd_arg, - Relay_log_info const *rli, + rpl_group_info *rgi, TABLE *table, uchar const *row_start, uchar const **row_end) @@ -1050,7 +1050,7 @@ Delete_rows_log_event_old::do_prepare_row(THD *thd_arg, */ DBUG_ASSERT(table->s->fields >= m_width); - error= unpack_row_old(const_cast(rli), + error= unpack_row_old(rgi, table, m_width, table->record[0], row_start, m_rows_end, &m_cols, row_end, &m_master_reclength, @@ -1134,7 +1134,7 @@ int Update_rows_log_event_old::do_after_row_operations(TABLE *table, int error) int Update_rows_log_event_old::do_prepare_row(THD *thd_arg, - Relay_log_info const *rli, + rpl_group_info *rgi, TABLE *table, uchar const *row_start, uchar const **row_end) @@ -1148,14 +1148,14 @@ int Update_rows_log_event_old::do_prepare_row(THD *thd_arg, DBUG_ASSERT(table->s->fields >= m_width); /* record[0] is the before image for the update */ - error= unpack_row_old(const_cast(rli), + error= unpack_row_old(rgi, table, m_width, table->record[0], row_start, m_rows_end, &m_cols, row_end, &m_master_reclength, table->read_set, PRE_GA_UPDATE_ROWS_EVENT); row_start = *row_end; /* m_after_image is the after image for the update */ - error= unpack_row_old(const_cast(rli), + error= unpack_row_old(rgi, table, m_width, m_after_image, row_start, m_rows_end, &m_cols, row_end, &m_master_reclength, @@ -1471,7 +1471,7 @@ int Old_rows_log_event::do_apply_event(rpl_group_info *rgi) */ DBUG_ASSERT(get_flags(STMT_END_F)); - const_cast(rli)->slave_close_thread_tables(thd); + rgi->slave_close_thread_tables(thd); thd->clear_error(); DBUG_RETURN(0); } @@ -1499,8 +1499,8 @@ int Old_rows_log_event::do_apply_event(rpl_group_info *rgi) */ lex_start(thd); - if ((error= lock_tables(thd, rli->tables_to_lock, - rli->tables_to_lock_count, 0))) + if ((error= lock_tables(thd, rgi->tables_to_lock, + rgi->tables_to_lock_count, 0))) { if (thd->is_slave_error || thd->is_fatal_error) { @@ -1522,7 +1522,7 @@ int Old_rows_log_event::do_apply_event(rpl_group_info *rgi) "Error in %s event: when locking tables", get_type_str()); } - const_cast(rli)->slave_close_thread_tables(thd); + rgi->slave_close_thread_tables(thd); DBUG_RETURN(error); } @@ -1535,8 +1535,8 @@ int Old_rows_log_event::do_apply_event(rpl_group_info *rgi) */ { - RPL_TABLE_LIST *ptr= rli->tables_to_lock; - for (uint i= 0 ; ptr&& (i< rli->tables_to_lock_count); + RPL_TABLE_LIST *ptr= rgi->tables_to_lock; + for (uint i= 0 ; ptr&& (i< rgi->tables_to_lock_count); ptr= static_cast(ptr->next_global), i++) { TABLE *conv_table; @@ -1544,7 +1544,7 @@ int Old_rows_log_event::do_apply_event(rpl_group_info *rgi) ptr->table, &conv_table)) { thd->is_slave_error= 1; - const_cast(rli)->slave_close_thread_tables(thd); + rgi->slave_close_thread_tables(thd); DBUG_RETURN(ERR_BAD_TABLE_DEF); } ptr->m_conv_table= conv_table; @@ -1566,18 +1566,18 @@ int Old_rows_log_event::do_apply_event(rpl_group_info *rgi) Old_rows_log_event, we can invalidate the query cache for the associated table. */ - for (TABLE_LIST *ptr= rli->tables_to_lock ; ptr ; ptr= ptr->next_global) + for (TABLE_LIST *ptr= rgi->tables_to_lock ; ptr ; ptr= ptr->next_global) { - const_cast(rli)->m_table_map.set_table(ptr->table_id, ptr->table); + rgi->m_table_map.set_table(ptr->table_id, ptr->table); } #ifdef HAVE_QUERY_CACHE - query_cache.invalidate_locked_for_write(thd, rli->tables_to_lock); + query_cache.invalidate_locked_for_write(thd, rgi->tables_to_lock); #endif } TABLE* table= - m_table= const_cast(rli)->m_table_map.get_table(m_table_id); + m_table= rgi->m_table_map.get_table(m_table_id); if (table) { @@ -1657,7 +1657,7 @@ int Old_rows_log_event::do_apply_event(rpl_group_info *rgi) if (!table->in_use) table->in_use= thd; - error= do_exec_row(rli); + error= do_exec_row(rgi); DBUG_PRINT("info", ("error: %d", error)); DBUG_ASSERT(error != HA_ERR_RECORD_DELETED); @@ -1696,7 +1696,7 @@ int Old_rows_log_event::do_apply_event(rpl_group_info *rgi) (ulong) m_curr_row, (ulong) m_curr_row_end, (ulong) m_rows_end)); if (!m_curr_row_end && !error) - unpack_current_row(rli); + unpack_current_row(rgi); // at this moment m_curr_row_end should be set DBUG_ASSERT(error || m_curr_row_end != NULL); @@ -1733,7 +1733,7 @@ int Old_rows_log_event::do_apply_event(rpl_group_info *rgi) rollback at the caller along with sbr. */ thd->reset_current_stmt_binlog_format_row(); - const_cast(rli)->cleanup_context(thd, error); + rgi->cleanup_context(thd, error); thd->is_slave_error= 1; DBUG_RETURN(error); } @@ -1812,7 +1812,7 @@ int Old_rows_log_event::do_apply_event(rpl_group_info *rgi) */ thd->reset_current_stmt_binlog_format_row(); - const_cast(rli)->cleanup_context(thd, 0); + rgi->cleanup_context(thd, 0); } DBUG_RETURN(error); @@ -1998,8 +1998,7 @@ void Old_rows_log_event::print_helper(FILE *file, */ int -Old_rows_log_event::write_row(const Relay_log_info *const rli, - const bool overwrite) +Old_rows_log_event::write_row(rpl_group_info *rgi, const bool overwrite) { DBUG_ENTER("write_row"); DBUG_ASSERT(m_table != NULL && thd != NULL); @@ -2016,7 +2015,7 @@ Old_rows_log_event::write_row(const Relay_log_info *const rli, DBUG_RETURN(error); /* unpack row into table->record[0] */ - error= unpack_current_row(rli); // TODO: how to handle errors? + error= unpack_current_row(rgi); // TODO: how to handle errors? #ifndef DBUG_OFF DBUG_DUMP("record[0]", table->record[0], table->s->reclength); @@ -2123,7 +2122,7 @@ Old_rows_log_event::write_row(const Relay_log_info *const rli, if (!get_flags(COMPLETE_ROWS_F)) { restore_record(table,record[1]); - error= unpack_current_row(rli); + error= unpack_current_row(rgi); } #ifndef DBUG_OFF @@ -2218,7 +2217,7 @@ Old_rows_log_event::write_row(const Relay_log_info *const rli, for any following update/delete command. */ -int Old_rows_log_event::find_row(const Relay_log_info *rli) +int Old_rows_log_event::find_row(rpl_group_info *rgi) { DBUG_ENTER("find_row"); @@ -2231,7 +2230,7 @@ int Old_rows_log_event::find_row(const Relay_log_info *rli) // TODO: shall we check and report errors here? prepare_record(table, m_width, FALSE /* don't check errors */); - error= unpack_current_row(rli); + error= unpack_current_row(rgi); #ifndef DBUG_OFF DBUG_PRINT("info",("looking for the following record")); @@ -2603,10 +2602,10 @@ Write_rows_log_event_old::do_after_row_operations(const Slave_reporting_capabili int -Write_rows_log_event_old::do_exec_row(const Relay_log_info *const rli) +Write_rows_log_event_old::do_exec_row(rpl_group_info *rgi) { DBUG_ASSERT(m_table != NULL); - int error= write_row(rli, TRUE /* overwrite */); + int error= write_row(rgi, TRUE /* overwrite */); if (error && !thd->net.last_errno) thd->net.last_errno= error; @@ -2705,12 +2704,12 @@ Delete_rows_log_event_old::do_after_row_operations(const Slave_reporting_capabil } -int Delete_rows_log_event_old::do_exec_row(const Relay_log_info *const rli) +int Delete_rows_log_event_old::do_exec_row(rpl_group_info *rgi) { int error; DBUG_ASSERT(m_table != NULL); - if (!(error= find_row(rli))) + if (!(error= find_row(rgi))) { /* Delete the record found, located in record[0] @@ -2804,11 +2803,11 @@ Update_rows_log_event_old::do_after_row_operations(const Slave_reporting_capabil int -Update_rows_log_event_old::do_exec_row(const Relay_log_info *const rli) +Update_rows_log_event_old::do_exec_row(rpl_group_info *rgi) { DBUG_ASSERT(m_table != NULL); - int error= find_row(rli); + int error= find_row(rgi); if (error) { /* @@ -2816,7 +2815,7 @@ Update_rows_log_event_old::do_exec_row(const Relay_log_info *const rli) able to skip to the next pair of updates */ m_curr_row= m_curr_row_end; - unpack_current_row(rli); + unpack_current_row(rgi); return error; } @@ -2834,7 +2833,7 @@ Update_rows_log_event_old::do_exec_row(const Relay_log_info *const rli) store_record(m_table,record[1]); m_curr_row= m_curr_row_end; - error= unpack_current_row(rli); // this also updates m_curr_row_end + error= unpack_current_row(rgi); // this also updates m_curr_row_end /* Now we have the right row to update. The old row (the one we're diff --git a/sql/log_event_old.h b/sql/log_event_old.h index 7c35b875dc4..01b80439fa1 100644 --- a/sql/log_event_old.h +++ b/sql/log_event_old.h @@ -195,15 +195,15 @@ protected: const uchar *m_curr_row_end; /* One-after the end of the current row */ uchar *m_key; /* Buffer to keep key value during searches */ - int find_row(const Relay_log_info *const); - int write_row(const Relay_log_info *const, const bool); + int find_row(rpl_group_info *); + int write_row(rpl_group_info *, const bool); // Unpack the current row into m_table->record[0] - int unpack_current_row(const Relay_log_info *const rli) + int unpack_current_row(rpl_group_info *rgi) { DBUG_ASSERT(m_table); ASSERT_OR_RETURN_ERROR(m_curr_row < m_rows_end, HA_ERR_CORRUPT_EVENT); - int const result= ::unpack_row(rli, m_table, m_width, m_curr_row, + int const result= ::unpack_row(rgi, m_table, m_width, m_curr_row, m_rows_end, &m_cols, &m_curr_row_end, &m_master_reclength); ASSERT_OR_RETURN_ERROR(m_curr_row_end <= m_rows_end, HA_ERR_CORRUPT_EVENT); @@ -267,7 +267,7 @@ private: 0 if execution succeeded, 1 if execution failed. */ - virtual int do_exec_row(const Relay_log_info *const rli) = 0; + virtual int do_exec_row(rpl_group_info *rgi) = 0; #endif /* !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) */ /********** END OF CUT & PASTE FROM Rows_log_event **********/ @@ -324,7 +324,7 @@ private: RETURN VALUE Error code, if something went wrong, 0 otherwise. */ - virtual int do_prepare_row(THD*, Relay_log_info const*, TABLE*, + virtual int do_prepare_row(THD*, rpl_group_info*, TABLE*, uchar const *row_start, uchar const **row_end) = 0; @@ -387,7 +387,7 @@ private: #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) virtual int do_before_row_operations(const Slave_reporting_capability *const); virtual int do_after_row_operations(const Slave_reporting_capability *const,int); - virtual int do_exec_row(const Relay_log_info *const); + virtual int do_exec_row(rpl_group_info *); #endif /********** END OF CUT & PASTE FROM Write_rows_log_event **********/ @@ -409,7 +409,7 @@ private: // primitives for old version of do_apply_event() virtual int do_before_row_operations(TABLE *table); virtual int do_after_row_operations(TABLE *table, int error); - virtual int do_prepare_row(THD*, Relay_log_info const*, TABLE*, + virtual int do_prepare_row(THD*, rpl_group_info*, TABLE*, uchar const *row_start, uchar const **row_end); virtual int do_exec_row(TABLE *table); @@ -463,7 +463,7 @@ protected: #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) virtual int do_before_row_operations(const Slave_reporting_capability *const); virtual int do_after_row_operations(const Slave_reporting_capability *const,int); - virtual int do_exec_row(const Relay_log_info *const); + virtual int do_exec_row(rpl_group_info *); #endif /* !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) */ /********** END OF CUT & PASTE FROM Update_rows_log_event **********/ @@ -487,7 +487,7 @@ private: // primitives for old version of do_apply_event() virtual int do_before_row_operations(TABLE *table); virtual int do_after_row_operations(TABLE *table, int error); - virtual int do_prepare_row(THD*, Relay_log_info const*, TABLE*, + virtual int do_prepare_row(THD*, rpl_group_info*, TABLE*, uchar const *row_start, uchar const **row_end); virtual int do_exec_row(TABLE *table); #endif /* !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) */ @@ -538,7 +538,7 @@ protected: #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) virtual int do_before_row_operations(const Slave_reporting_capability *const); virtual int do_after_row_operations(const Slave_reporting_capability *const,int); - virtual int do_exec_row(const Relay_log_info *const); + virtual int do_exec_row(rpl_group_info *); #endif /********** END CUT & PASTE FROM Delete_rows_log_event **********/ @@ -562,7 +562,7 @@ private: // primitives for old version of do_apply_event() virtual int do_before_row_operations(TABLE *table); virtual int do_after_row_operations(TABLE *table, int error); - virtual int do_prepare_row(THD*, Relay_log_info const*, TABLE*, + virtual int do_prepare_row(THD*, rpl_group_info*, TABLE*, uchar const *row_start, uchar const **row_end); virtual int do_exec_row(TABLE *table); #endif diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index 7cf2c9162ff..b4c1f6c941a 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -72,6 +72,7 @@ rpt_handle_event(rpl_parallel_thread::queued_event *qev, /* ToDo: Access to thd, and what about rli, split out a parallel part? */ mysql_mutex_lock(&rli->data_lock); err= apply_event_and_update_pos(qev->ev, thd, rgi, rpt); + thd->rgi_slave= NULL; /* ToDo: error handling. */ } @@ -487,12 +488,22 @@ rpl_parallel_thread_pool::get_thread(rpl_parallel_entry *entry) } +static void +free_rpl_parallel_entry(void *element) +{ + rpl_parallel_entry *e= (rpl_parallel_entry *)element; + mysql_cond_destroy(&e->COND_parallel_entry); + mysql_mutex_destroy(&e->LOCK_parallel_entry); + my_free(e); +} + + rpl_parallel::rpl_parallel() : current(NULL) { my_hash_init(&domain_hash, &my_charset_bin, 32, offsetof(rpl_parallel_entry, domain_id), sizeof(uint32), - NULL, NULL, HASH_UNIQUE); + NULL, free_rpl_parallel_entry, HASH_UNIQUE); } @@ -667,6 +678,7 @@ rpl_parallel::do_event(rpl_group_info *serial_rgi, Log_event *ev) qev->rgi= serial_rgi; rpt_handle_event(qev, NULL); delete_or_keep_event_post_apply(serial_rgi, typ, qev->ev); + my_free(qev); return false; } diff --git a/sql/rpl_record.cc b/sql/rpl_record.cc index 99bf8a82004..12df72a251b 100644 --- a/sql/rpl_record.cc +++ b/sql/rpl_record.cc @@ -186,7 +186,7 @@ pack_row(TABLE *table, MY_BITMAP const* cols, */ #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) int -unpack_row(Relay_log_info const *rli, +unpack_row(rpl_group_info *rgi, TABLE *table, uint const colcnt, uchar const *const row_data, uchar const *const row_buffer_end, MY_BITMAP const *cols, @@ -214,18 +214,18 @@ unpack_row(Relay_log_info const *rli, uint i= 0; table_def *tabledef= NULL; TABLE *conv_table= NULL; - bool table_found= rli && rli->get_table_data(table, &tabledef, &conv_table); + bool table_found= rgi && rgi->get_table_data(table, &tabledef, &conv_table); DBUG_PRINT("debug", ("Table data: table_found: %d, tabldef: %p, conv_table: %p", table_found, tabledef, conv_table)); DBUG_ASSERT(table_found); /* - If rli is NULL it means that there is no source table and that the + If rgi is NULL it means that there is no source table and that the row shall just be unpacked without doing any checks. This feature is used by MySQL Backup, but can be used for other purposes as well. */ - if (rli && !table_found) + if (rgi && !table_found) DBUG_RETURN(HA_ERR_GENERIC); for (field_ptr= begin_ptr ; field_ptr < end_ptr && *field_ptr ; ++field_ptr) @@ -313,7 +313,7 @@ unpack_row(Relay_log_info const *rli, (int) (pack_ptr - old_pack_ptr))); if (!pack_ptr) { - rli->report(ERROR_LEVEL, ER_SLAVE_CORRUPT_EVENT, + rgi->rli->report(ERROR_LEVEL, ER_SLAVE_CORRUPT_EVENT, "Could not read field '%s' of table '%s.%s'", f->field_name, table->s->db.str, table->s->table_name.str); diff --git a/sql/rpl_record.h b/sql/rpl_record.h index 4b34dcd0a96..7369edf1379 100644 --- a/sql/rpl_record.h +++ b/sql/rpl_record.h @@ -21,7 +21,7 @@ #include #include "my_global.h" /* uchar */ -class Relay_log_info; +class rpl_group_info; struct TABLE; typedef struct st_bitmap MY_BITMAP; @@ -31,7 +31,7 @@ size_t pack_row(TABLE* table, MY_BITMAP const* cols, #endif #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) -int unpack_row(Relay_log_info const *rli, +int unpack_row(rpl_group_info *rgi, TABLE *table, uint const colcnt, uchar const *const row_data, uchar const *row_buffer_end, MY_BITMAP const *cols, diff --git a/sql/rpl_record_old.cc b/sql/rpl_record_old.cc index fa0c49b413c..5afa529a63c 100644 --- a/sql/rpl_record_old.cc +++ b/sql/rpl_record_old.cc @@ -88,7 +88,7 @@ pack_row_old(TABLE *table, MY_BITMAP const* cols, */ #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) int -unpack_row_old(Relay_log_info *rli, +unpack_row_old(rpl_group_info *rgi, TABLE *table, uint const colcnt, uchar *record, uchar const *row, const uchar *row_buffer_end, MY_BITMAP const *cols, @@ -141,7 +141,7 @@ unpack_row_old(Relay_log_info *rli, f->move_field_offset(-offset); if (!ptr) { - rli->report(ERROR_LEVEL, ER_SLAVE_CORRUPT_EVENT, + rgi->rli->report(ERROR_LEVEL, ER_SLAVE_CORRUPT_EVENT, "Could not read field `%s` of table `%s`.`%s`", f->field_name, table->s->db.str, table->s->table_name.str); @@ -183,7 +183,7 @@ unpack_row_old(Relay_log_info *rli, if (event_type == WRITE_ROWS_EVENT && ((*field_ptr)->flags & mask) == mask) { - rli->report(ERROR_LEVEL, ER_NO_DEFAULT_FOR_FIELD, + rgi->rli->report(ERROR_LEVEL, ER_NO_DEFAULT_FOR_FIELD, "Field `%s` of table `%s`.`%s` " "has no default value and cannot be NULL", (*field_ptr)->field_name, table->s->db.str, diff --git a/sql/rpl_record_old.h b/sql/rpl_record_old.h index ea981fb23c3..34ef9f11c47 100644 --- a/sql/rpl_record_old.h +++ b/sql/rpl_record_old.h @@ -23,7 +23,7 @@ size_t pack_row_old(TABLE *table, MY_BITMAP const* cols, uchar *row_data, const uchar *record); #ifdef HAVE_REPLICATION -int unpack_row_old(Relay_log_info *rli, +int unpack_row_old(rpl_group_info *rgi, TABLE *table, uint const colcnt, uchar *record, uchar const *row, uchar const *row_buffer_end, MY_BITMAP const *cols, diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index 49547718230..c4b898f74e3 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -59,7 +59,6 @@ Relay_log_info::Relay_log_info(bool is_slave_recovery) abort_pos_wait(0), slave_run_id(0), sql_thd(0), inited(0), abort_slave(0), slave_running(0), until_condition(UNTIL_NONE), until_log_pos(0), retried_trans(0), executed_entries(0), - tables_to_lock(0), tables_to_lock_count(0), last_event_start_time(0), m_flags(0), row_stmt_start_timestamp(0), long_find_row_note_printed(false) { @@ -135,8 +134,6 @@ int init_relay_log_info(Relay_log_info* rli, rli->abort_pos_wait=0; rli->log_space_limit= relay_log_space_limit; rli->log_space_total= 0; - rli->tables_to_lock= 0; - rli->tables_to_lock_count= 0; char pattern[FN_REFLEN]; (void) my_realpath(pattern, slave_load_tmpdir, 0); @@ -1261,129 +1258,6 @@ void Relay_log_info::stmt_done(my_off_t event_master_log_pos, } #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) -void Relay_log_info::cleanup_context(THD *thd, bool error) -{ - DBUG_ENTER("Relay_log_info::cleanup_context"); - - /* - In parallel replication, different THDs can be used from different - parallel threads. But in single-threaded mode, only the THD of the main - SQL thread is allowed. - */ - DBUG_ASSERT(opt_slave_parallel_threads > 0 || sql_thd == thd); - /* - 1) Instances of Table_map_log_event, if ::do_apply_event() was called on them, - may have opened tables, which we cannot be sure have been closed (because - maybe the Rows_log_event have not been found or will not be, because slave - SQL thread is stopping, or relay log has a missing tail etc). So we close - all thread's tables. And so the table mappings have to be cancelled. - 2) Rows_log_event::do_apply_event() may even have started statements or - transactions on them, which we need to rollback in case of error. - 3) If finding a Format_description_log_event after a BEGIN, we also need - to rollback before continuing with the next events. - 4) so we need this "context cleanup" function. - */ - if (error) - { - trans_rollback_stmt(thd); // if a "statement transaction" - trans_rollback(thd); // if a "real transaction" - } - m_table_map.clear_tables(); - slave_close_thread_tables(thd); - if (error) - thd->mdl_context.release_transactional_locks(); - clear_flag(IN_STMT); - /* - Cleanup for the flags that have been set at do_apply_event. - */ - thd->variables.option_bits&= ~OPTION_NO_FOREIGN_KEY_CHECKS; - thd->variables.option_bits&= ~OPTION_RELAXED_UNIQUE_CHECKS; - - /* - Reset state related to long_find_row notes in the error log: - - timestamp - - flag that decides whether the slave prints or not - */ - reset_row_stmt_start_timestamp(); - unset_long_find_row_note_printed(); - - DBUG_VOID_RETURN; -} - -void Relay_log_info::clear_tables_to_lock() -{ - DBUG_ENTER("Relay_log_info::clear_tables_to_lock()"); -#ifndef DBUG_OFF - /** - When replicating in RBR and MyISAM Merge tables are involved - open_and_lock_tables (called in do_apply_event) appends the - base tables to the list of tables_to_lock. Then these are - removed from the list in close_thread_tables (which is called - before we reach this point). - - This assertion just confirms that we get no surprises at this - point. - */ - uint i=0; - for (TABLE_LIST *ptr= tables_to_lock ; ptr ; ptr= ptr->next_global, i++) ; - DBUG_ASSERT(i == tables_to_lock_count); -#endif - - while (tables_to_lock) - { - uchar* to_free= reinterpret_cast(tables_to_lock); - if (tables_to_lock->m_tabledef_valid) - { - tables_to_lock->m_tabledef.table_def::~table_def(); - tables_to_lock->m_tabledef_valid= FALSE; - } - - /* - If blob fields were used during conversion of field values - from the master table into the slave table, then we need to - free the memory used temporarily to store their values before - copying into the slave's table. - */ - if (tables_to_lock->m_conv_table) - free_blobs(tables_to_lock->m_conv_table); - - tables_to_lock= - static_cast(tables_to_lock->next_global); - tables_to_lock_count--; - my_free(to_free); - } - DBUG_ASSERT(tables_to_lock == NULL && tables_to_lock_count == 0); - DBUG_VOID_RETURN; -} - -void Relay_log_info::slave_close_thread_tables(THD *thd) -{ - DBUG_ENTER("Relay_log_info::slave_close_thread_tables(THD *thd)"); - thd->stmt_da->can_overwrite_status= TRUE; - thd->is_error() ? trans_rollback_stmt(thd) : trans_commit_stmt(thd); - thd->stmt_da->can_overwrite_status= FALSE; - - close_thread_tables(thd); - /* - - If inside a multi-statement transaction, - defer the release of metadata locks until the current - transaction is either committed or rolled back. This prevents - other statements from modifying the table for the entire - duration of this transaction. This provides commit ordering - and guarantees serializability across multiple transactions. - - If in autocommit mode, or outside a transactional context, - automatically release metadata locks of the current statement. - */ - if (! thd->in_multi_stmt_transaction_mode()) - thd->mdl_context.release_transactional_locks(); - else - thd->mdl_context.release_statement_locks(); - - clear_tables_to_lock(); - DBUG_VOID_RETURN; -} - - int rpl_load_gtid_slave_state(THD *thd) { @@ -1539,7 +1413,8 @@ end: rpl_group_info::rpl_group_info(Relay_log_info *rli_) : rli(rli_), thd(0), gtid_sub_id(0), wait_commit_sub_id(0), wait_commit_group_info(0), wait_start_sub_id(0), parallel_entry(0), - deferred_events(NULL), m_annotate_event(0) + deferred_events(NULL), m_annotate_event(0), tables_to_lock(0), + tables_to_lock_count(0) { bzero(¤t_gtid, sizeof(current_gtid)); } @@ -1613,4 +1488,126 @@ delete_or_keep_event_post_apply(rpl_group_info *rgi, } } + +void rpl_group_info::cleanup_context(THD *thd, bool error) +{ + DBUG_ENTER("Relay_log_info::cleanup_context"); + + DBUG_ASSERT(this->thd == thd); + /* + 1) Instances of Table_map_log_event, if ::do_apply_event() was called on them, + may have opened tables, which we cannot be sure have been closed (because + maybe the Rows_log_event have not been found or will not be, because slave + SQL thread is stopping, or relay log has a missing tail etc). So we close + all thread's tables. And so the table mappings have to be cancelled. + 2) Rows_log_event::do_apply_event() may even have started statements or + transactions on them, which we need to rollback in case of error. + 3) If finding a Format_description_log_event after a BEGIN, we also need + to rollback before continuing with the next events. + 4) so we need this "context cleanup" function. + */ + if (error) + { + trans_rollback_stmt(thd); // if a "statement transaction" + trans_rollback(thd); // if a "real transaction" + } + m_table_map.clear_tables(); + slave_close_thread_tables(thd); + if (error) + thd->mdl_context.release_transactional_locks(); + /* ToDo: This must clear the flag in rgi, not rli. */ + rli->clear_flag(Relay_log_info::IN_STMT); + /* + Cleanup for the flags that have been set at do_apply_event. + */ + thd->variables.option_bits&= ~OPTION_NO_FOREIGN_KEY_CHECKS; + thd->variables.option_bits&= ~OPTION_RELAXED_UNIQUE_CHECKS; + + /* + Reset state related to long_find_row notes in the error log: + - timestamp + - flag that decides whether the slave prints or not + */ + rli->reset_row_stmt_start_timestamp(); + rli->unset_long_find_row_note_printed(); + + DBUG_VOID_RETURN; +} + + +void rpl_group_info::clear_tables_to_lock() +{ + DBUG_ENTER("Relay_log_info::clear_tables_to_lock()"); +#ifndef DBUG_OFF + /** + When replicating in RBR and MyISAM Merge tables are involved + open_and_lock_tables (called in do_apply_event) appends the + base tables to the list of tables_to_lock. Then these are + removed from the list in close_thread_tables (which is called + before we reach this point). + + This assertion just confirms that we get no surprises at this + point. + */ + uint i=0; + for (TABLE_LIST *ptr= tables_to_lock ; ptr ; ptr= ptr->next_global, i++) ; + DBUG_ASSERT(i == tables_to_lock_count); +#endif + + while (tables_to_lock) + { + uchar* to_free= reinterpret_cast(tables_to_lock); + if (tables_to_lock->m_tabledef_valid) + { + tables_to_lock->m_tabledef.table_def::~table_def(); + tables_to_lock->m_tabledef_valid= FALSE; + } + + /* + If blob fields were used during conversion of field values + from the master table into the slave table, then we need to + free the memory used temporarily to store their values before + copying into the slave's table. + */ + if (tables_to_lock->m_conv_table) + free_blobs(tables_to_lock->m_conv_table); + + tables_to_lock= + static_cast(tables_to_lock->next_global); + tables_to_lock_count--; + my_free(to_free); + } + DBUG_ASSERT(tables_to_lock == NULL && tables_to_lock_count == 0); + DBUG_VOID_RETURN; +} + + +void rpl_group_info::slave_close_thread_tables(THD *thd) +{ + DBUG_ENTER("Relay_log_info::slave_close_thread_tables(THD *thd)"); + thd->stmt_da->can_overwrite_status= TRUE; + thd->is_error() ? trans_rollback_stmt(thd) : trans_commit_stmt(thd); + thd->stmt_da->can_overwrite_status= FALSE; + + close_thread_tables(thd); + /* + - If inside a multi-statement transaction, + defer the release of metadata locks until the current + transaction is either committed or rolled back. This prevents + other statements from modifying the table for the entire + duration of this transaction. This provides commit ordering + and guarantees serializability across multiple transactions. + - If in autocommit mode, or outside a transactional context, + automatically release metadata locks of the current statement. + */ + if (! thd->in_multi_stmt_transaction_mode()) + thd->mdl_context.release_transactional_locks(); + else + thd->mdl_context.release_statement_locks(); + + clear_tables_to_lock(); + DBUG_VOID_RETURN; +} + + #endif diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h index 4d954d1c8aa..10181cc6fab 100644 --- a/sql/rpl_rli.h +++ b/sql/rpl_rli.h @@ -361,27 +361,6 @@ public: group_relay_log_pos); } - RPL_TABLE_LIST *tables_to_lock; /* RBR: Tables to lock */ - uint tables_to_lock_count; /* RBR: Count of tables to lock */ - table_mapping m_table_map; /* RBR: Mapping table-id to table */ - - bool get_table_data(TABLE *table_arg, table_def **tabledef_var, TABLE **conv_table_var) const - { - DBUG_ASSERT(tabledef_var && conv_table_var); - for (TABLE_LIST *ptr= tables_to_lock ; ptr != NULL ; ptr= ptr->next_global) - if (ptr->table == table_arg) - { - *tabledef_var= &static_cast(ptr)->m_tabledef; - *conv_table_var= static_cast(ptr)->m_conv_table; - DBUG_PRINT("debug", ("Fetching table data for table %s.%s:" - " tabledef: %p, conv_table: %p", - table_arg->s->db.str, table_arg->s->table_name.str, - *tabledef_var, *conv_table_var)); - return true; - } - return false; - } - /* Last charset (6 bytes) seen by slave SQL thread is cached here; it helps the thread save 3 get_charset() per Query_log_event if the charset is not @@ -391,10 +370,6 @@ public: void cached_charset_invalidate(); bool cached_charset_compare(char *charset) const; - void cleanup_context(THD *, bool); - void slave_close_thread_tables(THD *); - void clear_tables_to_lock(); - /* Used to defer stopping the SQL thread to give it a chance to finish up the current group of events. @@ -588,6 +563,10 @@ struct rpl_group_info Annotate_rows_log_event *m_annotate_event; + RPL_TABLE_LIST *tables_to_lock; /* RBR: Tables to lock */ + uint tables_to_lock_count; /* RBR: Count of tables to lock */ + table_mapping m_table_map; /* RBR: Mapping table-id to table */ + rpl_group_info(Relay_log_info *rli_); ~rpl_group_info(); @@ -649,6 +628,26 @@ struct rpl_group_info } } + bool get_table_data(TABLE *table_arg, table_def **tabledef_var, TABLE **conv_table_var) const + { + DBUG_ASSERT(tabledef_var && conv_table_var); + for (TABLE_LIST *ptr= tables_to_lock ; ptr != NULL ; ptr= ptr->next_global) + if (ptr->table == table_arg) + { + *tabledef_var= &static_cast(ptr)->m_tabledef; + *conv_table_var= static_cast(ptr)->m_conv_table; + DBUG_PRINT("debug", ("Fetching table data for table %s.%s:" + " tabledef: %p, conv_table: %p", + table_arg->s->db.str, table_arg->s->table_name.str, + *tabledef_var, *conv_table_var)); + return true; + } + return false; + } + + void clear_tables_to_lock(); + void cleanup_context(THD *, bool); + void slave_close_thread_tables(THD *); }; diff --git a/sql/slave.cc b/sql/slave.cc index e0cc595213d..c807561b0b3 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -3307,7 +3307,7 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, else { exec_res= 0; - rli->cleanup_context(thd, 1); + serial_rgi->cleanup_context(thd, 1); /* chance for concurrent connection to get more locks */ slave_sleep(thd, min(rli->trans_retries, MAX_SLAVE_RETRY_PAUSE), sql_slave_killed, rli); @@ -3983,7 +3983,7 @@ pthread_handler_t handle_slave_sql(void *arg) Master_info *mi= ((Master_info*)arg); Relay_log_info* rli = &mi->rli; const char *errmsg; - rpl_group_info serial_rgi(rli); + rpl_group_info *serial_rgi; // needs to call my_thread_init(), otherwise we get a coredump in DBUG_ stuff my_thread_init(); @@ -3992,10 +3992,11 @@ pthread_handler_t handle_slave_sql(void *arg) LINT_INIT(saved_master_log_pos); LINT_INIT(saved_log_pos); + serial_rgi= new rpl_group_info(rli); thd = new THD; // note that contructor of THD uses DBUG_ ! thd->thread_stack = (char*)&thd; // remember where our stack is thd->rpl_filter = mi->rpl_filter; - serial_rgi.thd= thd; + serial_rgi->thd= thd; DBUG_ASSERT(rli->inited); DBUG_ASSERT(rli->mi == mi); @@ -4025,10 +4026,10 @@ pthread_handler_t handle_slave_sql(void *arg) goto err_during_init; } thd->init_for_queries(); - thd->rgi_slave= &serial_rgi; - if ((serial_rgi.deferred_events_collecting= mi->rpl_filter->is_on())) + thd->rgi_slave= serial_rgi; + if ((serial_rgi->deferred_events_collecting= mi->rpl_filter->is_on())) { - serial_rgi.deferred_events= new Deferred_log_events(rli); + serial_rgi->deferred_events= new Deferred_log_events(rli); } thd->temporary_tables = rli->save_temporary_tables; // restore temp tables @@ -4211,7 +4212,7 @@ log '%s' at position %s, relay log '%s' position: %s%s", RPL_LOG_NAME, saved_skip= 0; } - if (exec_relay_log_event(thd, rli, &serial_rgi)) + if (exec_relay_log_event(thd, rli, serial_rgi)) { DBUG_PRINT("info", ("exec_relay_log_event() failed")); // do not scare the user if SQL thread was simply killed or stopped @@ -4338,7 +4339,7 @@ the slave SQL thread with \"SLAVE START\". We stopped at log \ must "proactively" clear playgrounds: */ thd->clear_error(); - rli->cleanup_context(thd, 1); + serial_rgi->cleanup_context(thd, 1); /* Some extra safety, which should not been needed (normally, event deletion should already have done these assignments (each event which sets these @@ -4379,6 +4380,7 @@ err_during_init: mysql_mutex_lock(&LOCK_thread_count); THD_CHECK_SENTRY(thd); delete thd; + delete serial_rgi; mysql_mutex_unlock(&LOCK_thread_count); /* Note: the order of the broadcast and unlock calls below (first broadcast, then unlock) diff --git a/sql/sql_binlog.cc b/sql/sql_binlog.cc index 04cb4adcb2c..4f41b942345 100644 --- a/sql/sql_binlog.cc +++ b/sql/sql_binlog.cc @@ -273,7 +273,7 @@ void mysql_client_binlog_statement(THD* thd) end: thd->variables.option_bits= thd_options; - rli->slave_close_thread_tables(thd); + rgi->slave_close_thread_tables(thd); my_free(buf); DBUG_VOID_RETURN; } From 7781cdb79bccef16e800d67eeade031e65967e82 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 17 Sep 2013 11:33:29 +0200 Subject: [PATCH 18/41] MDEV-4506: parallel replication. Add comments explaining tricky memory barrier semantics and suggestions for future changes. --- sql/log.cc | 12 ++++++++++++ sql/sql_class.cc | 13 +++++++++++++ 2 files changed, 25 insertions(+) diff --git a/sql/log.cc b/sql/log.cc index 763eb4177ea..ccedaa90ec6 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -6757,6 +6757,18 @@ MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *entry) { if (list->wakeup_subsequent_commits_running) { + /* + ToDo: We should not need a full lock/unlock of LOCK_wait_commit + here. All we need is a single (full) memory barrier, to ensure that + the reads of the list above are not reordered with the write of + wakeup_subsequent_commits_running, or with the writes to the list + from other threads that is allowed to happen after + wakeup_subsequent_commits_running has been set to false. + + We do not currently have explicit memory barrier primitives in the + source tree, but if we get them the below mysql_mutex_lock() could + be replaced with a full memory barrier just before the loop. + */ mysql_mutex_lock(&list->LOCK_wait_commit); list->wakeup_subsequent_commits_running= false; mysql_mutex_unlock(&list->LOCK_wait_commit); diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 66b28c87ac9..9a638947257 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -5754,6 +5754,19 @@ wait_for_commit::wakeup_subsequent_commits2() waiter= next; } + /* + ToDo: We should not need a full lock/unlock of LOCK_wait_commit here. All + we need is a (full) memory barrier, to ensure that the reads of the list + above are not reordered with the write of + wakeup_subsequent_commits_running, or with the writes to the list from + other threads that is allowed to happen after + wakeup_subsequent_commits_running has been set to false. + + We do not currently have explicit memory barrier primitives in the source + tree, but if we get them the below mysql_mutex_lock() could be replaced + with a full memory barrier. It is probably not important, the lock is not + contented and will likely be in the CPU cache since we took it just before. + */ mysql_mutex_lock(&LOCK_wait_commit); wakeup_subsequent_commits_running= false; mysql_mutex_unlock(&LOCK_wait_commit); From 39794dc72c2eb2beb5838a47c87c82b4bb461aba Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 17 Sep 2013 14:07:21 +0200 Subject: [PATCH 19/41] MDEV-4506: Parallel replication. Add another test case, using DEBUG_SYNC. Fix one bug found. --- mysql-test/include/show_events.inc | 2 +- mysql-test/suite/rpl/r/rpl_parallel.result | 76 +++++++++++++++++ mysql-test/suite/rpl/t/rpl_parallel.test | 94 ++++++++++++++++++++++ sql/log.cc | 1 + sql/rpl_parallel.cc | 8 ++ sql/rpl_parallel.h | 1 + sql/slave.cc | 1 + 7 files changed, 182 insertions(+), 1 deletion(-) diff --git a/mysql-test/include/show_events.inc b/mysql-test/include/show_events.inc index eeae503ce5e..d249352823e 100644 --- a/mysql-test/include/show_events.inc +++ b/mysql-test/include/show_events.inc @@ -63,5 +63,5 @@ if (`SELECT '$binlog_limit' <> ''`) --replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR $_binlog_start --replace_column 2 # 4 # 5 # ---replace_regex /\/\* xid=.* \*\//\/* XID *\// /table_id: [0-9]+/table_id: #/ /file_id=[0-9]+/file_id=#/ /block_len=[0-9]+/block_len=#/ /Server ver:.*$/SERVER_VERSION, BINLOG_VERSION/ /GTID [0-9]+-[0-9]+-[0-9]+/GTID #-#-#/ /\[([0-9]-[0-9]-[0-9]+)\]/[#-#-#]/ +--replace_regex /\/\* xid=.* \*\//\/* XID *\// /table_id: [0-9]+/table_id: #/ /file_id=[0-9]+/file_id=#/ /block_len=[0-9]+/block_len=#/ /Server ver:.*$/SERVER_VERSION, BINLOG_VERSION/ /GTID [0-9]+-[0-9]+-[0-9]+/GTID #-#-#/ /\[([0-9]-[0-9]-[0-9]+)\]/[#-#-#]/ /cid=[0-9]+/cid=#/ --eval $_statement diff --git a/mysql-test/suite/rpl/r/rpl_parallel.result b/mysql-test/suite/rpl/r/rpl_parallel.result index e60b9406b8e..41e891cf33b 100644 --- a/mysql-test/suite/rpl/r/rpl_parallel.result +++ b/mysql-test/suite/rpl/r/rpl_parallel.result @@ -38,8 +38,84 @@ SELECT * FROM t1 ORDER BY a; a 1 2 +*** Test two transactions in different domains committed in opposite order on slave but in a single group commit. *** +include/stop_slave.inc +SET sql_log_bin=0; +CREATE FUNCTION foo(x INT, d1 VARCHAR(500), d2 VARCHAR(500)) +RETURNS INT DETERMINISTIC +BEGIN +RETURN x; +END +|| +SET sql_log_bin=1; +SET @old_format= @@SESSION.binlog_format; +SET binlog_format='statement'; +SET gtid_domain_id=1; +INSERT INTO t2 VALUES (foo(10, +'commit_before_enqueue SIGNAL ready1 WAIT_FOR cont1', +'commit_after_release_LOCK_prepare_ordered SIGNAL ready2')); +FLUSH LOGS; +SET sql_log_bin=0; +CREATE FUNCTION foo(x INT, d1 VARCHAR(500), d2 VARCHAR(500)) +RETURNS INT DETERMINISTIC +BEGIN +IF d1 != '' THEN +SET debug_sync = d1; +END IF; +IF d2 != '' THEN +SET debug_sync = d2; +END IF; +RETURN x; +END +|| +SET sql_log_bin=1; +SET @old_format=@@GLOBAL.binlog_format; +SET GLOBAL binlog_format=statement; +SET GLOBAL slave_parallel_threads=0; +SET GLOBAL slave_parallel_threads=10; +include/start_slave.inc +SET debug_sync='now WAIT_FOR ready1'; +SET gtid_domain_id=2; +INSERT INTO t2 VALUES (foo(11, +'commit_before_enqueue SIGNAL ready3 WAIT_FOR cont3', +'commit_after_release_LOCK_prepare_ordered SIGNAL ready4 WAIT_FOR cont4')); +SET gtid_domain_id=0; +SET binlog_format=@old_format; +SELECT * FROM t2 WHERE a >= 10 ORDER BY a; +a +10 +11 +SET debug_sync='now WAIT_FOR ready3'; +SET debug_sync='now SIGNAL cont3'; +SET debug_sync='now WAIT_FOR ready4'; +SET debug_sync='now SIGNAL cont1'; +SET debug_sync='now WAIT_FOR ready2'; +SET debug_sync='now SIGNAL cont4'; +SELECT * FROM t2 WHERE a >= 10 ORDER BY a; +a +10 +11 +show binlog events in 'slave-bin.000002' from ; +Log_name Pos Event_type Server_id End_log_pos Info +slave-bin.000002 # Binlog_checkpoint # # slave-bin.000002 +slave-bin.000002 # Gtid # # BEGIN GTID #-#-# cid=# +slave-bin.000002 # Query # # use `test`; INSERT INTO t2 VALUES (foo(11, +'commit_before_enqueue SIGNAL ready3 WAIT_FOR cont3', +'commit_after_release_LOCK_prepare_ordered SIGNAL ready4 WAIT_FOR cont4')) +slave-bin.000002 # Xid # # COMMIT /* XID */ +slave-bin.000002 # Gtid # # BEGIN GTID #-#-# cid=# +slave-bin.000002 # Query # # use `test`; INSERT INTO t2 VALUES (foo(10, +'commit_before_enqueue SIGNAL ready1 WAIT_FOR cont1', +'commit_after_release_LOCK_prepare_ordered SIGNAL ready2')) +slave-bin.000002 # Xid # # COMMIT /* XID */ +include/stop_slave.inc +SET GLOBAL binlog_format=@old_format; +SET GLOBAL slave_parallel_threads=0; +SET GLOBAL slave_parallel_threads=10; +include/start_slave.inc include/stop_slave.inc SET GLOBAL slave_parallel_threads=@old_parallel_threads; include/start_slave.inc +DROP function foo; DROP TABLE t1,t2; include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_parallel.test b/mysql-test/suite/rpl/t/rpl_parallel.test index b9ba88489e4..6054263e4fd 100644 --- a/mysql-test/suite/rpl/t/rpl_parallel.test +++ b/mysql-test/suite/rpl/t/rpl_parallel.test @@ -63,12 +63,106 @@ UNLOCK TABLES; SELECT * FROM t1 ORDER BY a; + +--echo *** Test two transactions in different domains committed in opposite order on slave but in a single group commit. *** +--connection server_2 +--source include/stop_slave.inc + +--connection server_1 +# Use a stored function to inject a debug_sync into the appropriate THD. +# The function does nothing on the master, and on the slave it injects the +# desired debug_sync action(s). +SET sql_log_bin=0; +--delimiter || +CREATE FUNCTION foo(x INT, d1 VARCHAR(500), d2 VARCHAR(500)) + RETURNS INT DETERMINISTIC + BEGIN + RETURN x; + END +|| +--delimiter ; +SET sql_log_bin=1; + +SET @old_format= @@SESSION.binlog_format; +SET binlog_format='statement'; +SET gtid_domain_id=1; +INSERT INTO t2 VALUES (foo(10, + 'commit_before_enqueue SIGNAL ready1 WAIT_FOR cont1', + 'commit_after_release_LOCK_prepare_ordered SIGNAL ready2')); + +--connection server_2 +FLUSH LOGS; +SET sql_log_bin=0; +--delimiter || +CREATE FUNCTION foo(x INT, d1 VARCHAR(500), d2 VARCHAR(500)) + RETURNS INT DETERMINISTIC + BEGIN + IF d1 != '' THEN + SET debug_sync = d1; + END IF; + IF d2 != '' THEN + SET debug_sync = d2; + END IF; + RETURN x; + END +|| +--delimiter ; +SET sql_log_bin=1; +SET @old_format=@@GLOBAL.binlog_format; +SET GLOBAL binlog_format=statement; +# We need to restart all parallel threads for the new global setting to +# be copied to the session-level values. +SET GLOBAL slave_parallel_threads=0; +SET GLOBAL slave_parallel_threads=10; +--source include/start_slave.inc + +# First make sure the first insert is ready to commit, but not queued yet. +SET debug_sync='now WAIT_FOR ready1'; + +--connection server_1 +SET gtid_domain_id=2; +INSERT INTO t2 VALUES (foo(11, + 'commit_before_enqueue SIGNAL ready3 WAIT_FOR cont3', + 'commit_after_release_LOCK_prepare_ordered SIGNAL ready4 WAIT_FOR cont4')); +SET gtid_domain_id=0; +SET binlog_format=@old_format; +SELECT * FROM t2 WHERE a >= 10 ORDER BY a; + +--connection server_2 +# Now wait for the second insert to queue itself as the leader, and then +# wait for more commits to queue up. +SET debug_sync='now WAIT_FOR ready3'; +SET debug_sync='now SIGNAL cont3'; +SET debug_sync='now WAIT_FOR ready4'; +# Now allow the first insert to queue up to participate in group commit. +SET debug_sync='now SIGNAL cont1'; +SET debug_sync='now WAIT_FOR ready2'; +# Finally allow the second insert to proceed and do the group commit. +SET debug_sync='now SIGNAL cont4'; + +--let $wait_condition= SELECT COUNT(*) = 2 FROM t2 WHERE a >= 10 +--source include/wait_condition.inc +SELECT * FROM t2 WHERE a >= 10 ORDER BY a; +# The two INSERT transactions should have been committed in opposite order, +# but in the same group commit (seen by precense of cid=# in the SHOW +# BINLOG output). +--let $binlog_file= slave-bin.000002 +--source include/show_binlog_events.inc + +--source include/stop_slave.inc +SET GLOBAL binlog_format=@old_format; +SET GLOBAL slave_parallel_threads=0; +SET GLOBAL slave_parallel_threads=10; +--source include/start_slave.inc + + --connection server_2 --source include/stop_slave.inc SET GLOBAL slave_parallel_threads=@old_parallel_threads; --source include/start_slave.inc --connection server_1 +DROP function foo; DROP TABLE t1,t2; --source include/rpl_end.inc diff --git a/sql/log.cc b/sql/log.cc index ccedaa90ec6..4b5826f0097 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -6623,6 +6623,7 @@ MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *entry) return false; /* Now enqueue ourselves in the group commit queue. */ + DEBUG_SYNC(entry->thd, "commit_before_enqueue"); entry->thd->clear_wakeup_ready(); mysql_mutex_lock(&LOCK_prepare_ordered); orig_queue= group_commit_queue; diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index b4c1f6c941a..def0fe7c756 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -507,6 +507,14 @@ rpl_parallel::rpl_parallel() : } +void +rpl_parallel::reset() +{ + my_hash_reset(&domain_hash); + current= NULL; +} + + rpl_parallel::~rpl_parallel() { my_hash_free(&domain_hash); diff --git a/sql/rpl_parallel.h b/sql/rpl_parallel.h index adbb1a18526..d30540c3d03 100644 --- a/sql/rpl_parallel.h +++ b/sql/rpl_parallel.h @@ -76,6 +76,7 @@ struct rpl_parallel { rpl_parallel(); ~rpl_parallel(); + void reset(); rpl_parallel_entry *find(uint32 domain_id); void wait_for_done(); bool do_event(rpl_group_info *serial_rgi, Log_event *ev); diff --git a/sql/slave.cc b/sql/slave.cc index c807561b0b3..cd4e4254dbc 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -4066,6 +4066,7 @@ pthread_handler_t handle_slave_sql(void *arg) But the master timestamp is reset by RESET SLAVE & CHANGE MASTER. */ rli->clear_error(); + rli->parallel.reset(); //tell the I/O thread to take relay_log_space_limit into account from now on mysql_mutex_lock(&rli->log_space_lock); From 36df41ed95444e6a733e88d62daed0b281d012b2 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 19 Sep 2013 12:45:59 +0200 Subject: [PATCH 20/41] MDEV-4506: Parallel replication. Add another test case. --- mysql-test/suite/rpl/r/rpl_parallel.result | 107 +++++++++++++++++- mysql-test/suite/rpl/t/rpl_parallel.test | 124 ++++++++++++++++++++- sql/log.cc | 29 +++-- 3 files changed, 243 insertions(+), 17 deletions(-) diff --git a/mysql-test/suite/rpl/r/rpl_parallel.result b/mysql-test/suite/rpl/r/rpl_parallel.result index 41e891cf33b..2e13d914e62 100644 --- a/mysql-test/suite/rpl/r/rpl_parallel.result +++ b/mysql-test/suite/rpl/r/rpl_parallel.result @@ -80,7 +80,6 @@ INSERT INTO t2 VALUES (foo(11, 'commit_before_enqueue SIGNAL ready3 WAIT_FOR cont3', 'commit_after_release_LOCK_prepare_ordered SIGNAL ready4 WAIT_FOR cont4')); SET gtid_domain_id=0; -SET binlog_format=@old_format; SELECT * FROM t2 WHERE a >= 10 ORDER BY a; a 10 @@ -108,6 +107,110 @@ slave-bin.000002 # Query # # use `test`; INSERT INTO t2 VALUES (foo(10, 'commit_before_enqueue SIGNAL ready1 WAIT_FOR cont1', 'commit_after_release_LOCK_prepare_ordered SIGNAL ready2')) slave-bin.000002 # Xid # # COMMIT /* XID */ +FLUSH LOGS; +include/stop_slave.inc +SET GLOBAL slave_parallel_threads=0; +SET GLOBAL slave_parallel_threads=10; +SET debug_sync='RESET'; +include/start_slave.inc +*** Test that group-committed transactions on the master can replicate in parallel on the slave. *** +FLUSH LOGS; +CREATE TABLE t3 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB; +INSERT INTO t3 VALUES (1,1), (3,3), (5,5), (7,7); +SET binlog_format=@old_format; +BEGIN; +INSERT INTO t3 VALUES (2,102); +BEGIN; +INSERT INTO t3 VALUES (4,104); +SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued1 WAIT_FOR master_cont1'; +SET binlog_format=statement; +INSERT INTO t3 VALUES (2, foo(12, +'commit_after_release_LOCK_prepare_ordered SIGNAL slave_queued1 WAIT_FOR slave_cont1', +'')); +SET debug_sync='now WAIT_FOR master_queued1'; +SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued2'; +SET binlog_format=statement; +INSERT INTO t3 VALUES (4, foo(14, +'commit_after_release_LOCK_prepare_ordered SIGNAL slave_queued2', +'')); +SET debug_sync='now WAIT_FOR master_queued2'; +SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued3'; +SET binlog_format=statement; +INSERT INTO t3 VALUES (6, foo(16, +'group_commit_waiting_for_prior SIGNAL slave_queued3', +'')); +SET debug_sync='now WAIT_FOR master_queued3'; +SET debug_sync='now SIGNAL master_cont1'; +SELECT * FROM t3 ORDER BY a; +a b +1 1 +2 12 +3 3 +4 14 +5 5 +6 16 +7 7 +show binlog events in 'master-bin.000002' from ; +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000002 # Binlog_checkpoint # # master-bin.000002 +master-bin.000002 # Gtid # # GTID #-#-# +master-bin.000002 # Query # # use `test`; CREATE TABLE t3 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB +master-bin.000002 # Gtid # # BEGIN GTID #-#-# +master-bin.000002 # Query # # use `test`; INSERT INTO t3 VALUES (1,1), (3,3), (5,5), (7,7) +master-bin.000002 # Xid # # COMMIT /* XID */ +master-bin.000002 # Gtid # # BEGIN GTID #-#-# cid=# +master-bin.000002 # Query # # use `test`; INSERT INTO t3 VALUES (2, foo(12, +'commit_after_release_LOCK_prepare_ordered SIGNAL slave_queued1 WAIT_FOR slave_cont1', +'')) +master-bin.000002 # Xid # # COMMIT /* XID */ +master-bin.000002 # Gtid # # BEGIN GTID #-#-# cid=# +master-bin.000002 # Query # # use `test`; INSERT INTO t3 VALUES (4, foo(14, +'commit_after_release_LOCK_prepare_ordered SIGNAL slave_queued2', +'')) +master-bin.000002 # Xid # # COMMIT /* XID */ +master-bin.000002 # Gtid # # BEGIN GTID #-#-# cid=# +master-bin.000002 # Query # # use `test`; INSERT INTO t3 VALUES (6, foo(16, +'group_commit_waiting_for_prior SIGNAL slave_queued3', +'')) +master-bin.000002 # Xid # # COMMIT /* XID */ +SET debug_sync='now WAIT_FOR slave_queued3'; +ROLLBACK; +SET debug_sync='now WAIT_FOR slave_queued1'; +ROLLBACK; +SET debug_sync='now WAIT_FOR slave_queued2'; +SET debug_sync='now SIGNAL slave_cont1'; +SELECT * FROM t3 ORDER BY a; +a b +1 1 +2 12 +3 3 +4 14 +5 5 +6 16 +7 7 +show binlog events in 'slave-bin.000003' from ; +Log_name Pos Event_type Server_id End_log_pos Info +slave-bin.000003 # Binlog_checkpoint # # slave-bin.000003 +slave-bin.000003 # Gtid # # GTID #-#-# +slave-bin.000003 # Query # # use `test`; CREATE TABLE t3 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB +slave-bin.000003 # Gtid # # BEGIN GTID #-#-# +slave-bin.000003 # Query # # use `test`; INSERT INTO t3 VALUES (1,1), (3,3), (5,5), (7,7) +slave-bin.000003 # Xid # # COMMIT /* XID */ +slave-bin.000003 # Gtid # # BEGIN GTID #-#-# cid=# +slave-bin.000003 # Query # # use `test`; INSERT INTO t3 VALUES (2, foo(12, +'commit_after_release_LOCK_prepare_ordered SIGNAL slave_queued1 WAIT_FOR slave_cont1', +'')) +slave-bin.000003 # Xid # # COMMIT /* XID */ +slave-bin.000003 # Gtid # # BEGIN GTID #-#-# cid=# +slave-bin.000003 # Query # # use `test`; INSERT INTO t3 VALUES (4, foo(14, +'commit_after_release_LOCK_prepare_ordered SIGNAL slave_queued2', +'')) +slave-bin.000003 # Xid # # COMMIT /* XID */ +slave-bin.000003 # Gtid # # BEGIN GTID #-#-# cid=# +slave-bin.000003 # Query # # use `test`; INSERT INTO t3 VALUES (6, foo(16, +'group_commit_waiting_for_prior SIGNAL slave_queued3', +'')) +slave-bin.000003 # Xid # # COMMIT /* XID */ include/stop_slave.inc SET GLOBAL binlog_format=@old_format; SET GLOBAL slave_parallel_threads=0; @@ -117,5 +220,5 @@ include/stop_slave.inc SET GLOBAL slave_parallel_threads=@old_parallel_threads; include/start_slave.inc DROP function foo; -DROP TABLE t1,t2; +DROP TABLE t1,t2,t3; include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_parallel.test b/mysql-test/suite/rpl/t/rpl_parallel.test index 6054263e4fd..bfc6283da66 100644 --- a/mysql-test/suite/rpl/t/rpl_parallel.test +++ b/mysql-test/suite/rpl/t/rpl_parallel.test @@ -29,7 +29,7 @@ INSERT INTO t2 VALUES (1); --sync_with_master # Block the table t1 to simulate a replicated query taking a long time. ---connect (con_temp,127.0.0.1,root,,test,$SERVER_MYPORT_2,) +--connect (con_temp1,127.0.0.1,root,,test,$SERVER_MYPORT_2,) LOCK TABLE t1 WRITE; --connection server_1 @@ -53,7 +53,7 @@ INSERT INTO t2 VALUES (6); SELECT * FROM t2 ORDER by a; ---connection con_temp +--connection con_temp1 SELECT * FROM t1; UNLOCK TABLES; @@ -125,7 +125,6 @@ INSERT INTO t2 VALUES (foo(11, 'commit_before_enqueue SIGNAL ready3 WAIT_FOR cont3', 'commit_after_release_LOCK_prepare_ordered SIGNAL ready4 WAIT_FOR cont4')); SET gtid_domain_id=0; -SET binlog_format=@old_format; SELECT * FROM t2 WHERE a >= 10 ORDER BY a; --connection server_2 @@ -148,7 +147,124 @@ SELECT * FROM t2 WHERE a >= 10 ORDER BY a; # BINLOG output). --let $binlog_file= slave-bin.000002 --source include/show_binlog_events.inc +FLUSH LOGS; +# Restart all the slave parallel worker threads, to clear all debug_sync actions. +--connection server_2 +--source include/stop_slave.inc +SET GLOBAL slave_parallel_threads=0; +SET GLOBAL slave_parallel_threads=10; +SET debug_sync='RESET'; +--source include/start_slave.inc + + +--echo *** Test that group-committed transactions on the master can replicate in parallel on the slave. *** +--connection server_1 +FLUSH LOGS; +CREATE TABLE t3 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB; +# Create some sentinel rows so that the rows inserted in parallel fall into +# separate gaps and do not cause gap lock conflicts. +INSERT INTO t3 VALUES (1,1), (3,3), (5,5), (7,7); +SET binlog_format=@old_format; +--save_master_pos +--connection server_2 +--sync_with_master + +# We want to test that the transactions can execute out-of-order on +# the slave, but still end up committing in-order, and in a single +# group commit. +# +# The idea is to group-commit three transactions together on the master: +# A, B, and C. On the slave, C will execute the insert first, then A, +# and then B. But B manages to complete before A has time to commit, so +# all three end up committing together. +# +# So we start by setting up some row locks that will block transactions +# A and B from executing, allowing C to run first. + +--connection con_temp1 +BEGIN; +INSERT INTO t3 VALUES (2,102); +--connect (con_temp2,127.0.0.1,root,,test,$SERVER_MYPORT_2,) +BEGIN; +INSERT INTO t3 VALUES (4,104); + +# On the master, queue three INSERT transactions as a single group commit. +--connect (con_temp3,127.0.0.1,root,,test,$SERVER_MYPORT_1,) +SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued1 WAIT_FOR master_cont1'; +SET binlog_format=statement; +send INSERT INTO t3 VALUES (2, foo(12, + 'commit_after_release_LOCK_prepare_ordered SIGNAL slave_queued1 WAIT_FOR slave_cont1', + '')); + +--connection server_1 +SET debug_sync='now WAIT_FOR master_queued1'; + +--connect (con_temp4,127.0.0.1,root,,test,$SERVER_MYPORT_1,) +SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued2'; +SET binlog_format=statement; +send INSERT INTO t3 VALUES (4, foo(14, + 'commit_after_release_LOCK_prepare_ordered SIGNAL slave_queued2', + '')); + +--connection server_1 +SET debug_sync='now WAIT_FOR master_queued2'; + +--connect (con_temp5,127.0.0.1,root,,test,$SERVER_MYPORT_1,) +SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued3'; +SET binlog_format=statement; +send INSERT INTO t3 VALUES (6, foo(16, + 'group_commit_waiting_for_prior SIGNAL slave_queued3', + '')); + +--connection server_1 +SET debug_sync='now WAIT_FOR master_queued3'; +SET debug_sync='now SIGNAL master_cont1'; + +--connection con_temp3 +REAP; +--connection con_temp4 +REAP; +--connection con_temp5 +REAP; + +--connection server_1 +SELECT * FROM t3 ORDER BY a; +--let $binlog_file= master-bin.000002 +--source include/show_binlog_events.inc + +# First, wait until insert 3 is ready to queue up for group commit, but is +# waiting for insert 2 to commit before it can do so itself. +--connection server_2 +SET debug_sync='now WAIT_FOR slave_queued3'; + +# Next, let insert 1 proceed, and allow it to queue up as the group commit +# leader, but let it wait for insert 2 to also queue up before proceeding. +--connection con_temp1 +ROLLBACK; +--connection server_2 +SET debug_sync='now WAIT_FOR slave_queued1'; + +# Now let insert 2 proceed and queue up. +--connection con_temp2 +ROLLBACK; +--connection server_2 +SET debug_sync='now WAIT_FOR slave_queued2'; +# And finally, we can let insert 1 proceed and do the group commit with all +# three insert transactions together. +SET debug_sync='now SIGNAL slave_cont1'; + +# Wait for the commit to complete and check that all three transactions +# group-committed together (will be seen in the binlog as all three having +# cid=# on their GTID event). +--let $wait_condition= SELECT COUNT(*) = 3 FROM t3 WHERE a IN (2,4,6) +--source include/wait_condition.inc +SELECT * FROM t3 ORDER BY a; +--let $binlog_file= slave-bin.000003 +--source include/show_binlog_events.inc + + +--connection server_2 --source include/stop_slave.inc SET GLOBAL binlog_format=@old_format; SET GLOBAL slave_parallel_threads=0; @@ -163,6 +279,6 @@ SET GLOBAL slave_parallel_threads=@old_parallel_threads; --connection server_1 DROP function foo; -DROP TABLE t1,t2; +DROP TABLE t1,t2,t3; --source include/rpl_end.inc diff --git a/sql/log.cc b/sql/log.cc index 4b5826f0097..f965f5963b3 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -6573,11 +6573,12 @@ MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd, */ bool -MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *entry) +MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *orig_entry) { - group_commit_entry *orig_queue; + group_commit_entry *entry, *orig_queue; wait_for_commit *list, *cur, *last; wait_for_commit *wfc; + DBUG_ENTER("MYSQL_BIN_LOG::queue_for_group_commit"); /* Check if we need to wait for another transaction to commit before us. @@ -6587,8 +6588,8 @@ MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *entry) another safe check under lock, to avoid the race where the other transaction wakes us up between the check and the wait. */ - wfc= entry->thd->wait_for_commit_ptr; - entry->queued_by_other= false; + wfc= orig_entry->thd->wait_for_commit_ptr; + orig_entry->queued_by_other= false; if (wfc && wfc->waiting_for_commit) { mysql_mutex_lock(&wfc->LOCK_wait_commit); @@ -6604,12 +6605,15 @@ MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *entry) get us included in its own group commit. If this happens, the queued_by_other flag is set. */ - wfc->opaque_pointer= entry; + wfc->opaque_pointer= orig_entry; + DEBUG_SYNC(orig_entry->thd, "group_commit_waiting_for_prior"); do { mysql_cond_wait(&wfc->COND_wait_commit, &wfc->LOCK_wait_commit); } while (wfc->waiting_for_commit); wfc->opaque_pointer= NULL; + DBUG_PRINT("info", ("After waiting for prior commit, queued_by_other=%d", + orig_entry->queued_by_other)); } mysql_mutex_unlock(&wfc->LOCK_wait_commit); } @@ -6619,12 +6623,12 @@ MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *entry) commit queue (and possibly already done the entire binlog commit for us), then there is nothing else to do. */ - if (entry->queued_by_other) - return false; + if (orig_entry->queued_by_other) + DBUG_RETURN(false); /* Now enqueue ourselves in the group commit queue. */ - DEBUG_SYNC(entry->thd, "commit_before_enqueue"); - entry->thd->clear_wakeup_ready(); + DEBUG_SYNC(orig_entry->thd, "commit_before_enqueue"); + orig_entry->thd->clear_wakeup_ready(); mysql_mutex_lock(&LOCK_prepare_ordered); orig_queue= group_commit_queue; @@ -6657,6 +6661,7 @@ MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *entry) list= wfc; cur= list; last= list; + entry= orig_entry; for (;;) { /* Add the entry to the group commit queue. */ @@ -6783,9 +6788,11 @@ MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *entry) if (opt_binlog_commit_wait_count > 0) mysql_cond_signal(&COND_prepare_ordered); mysql_mutex_unlock(&LOCK_prepare_ordered); - DEBUG_SYNC(entry->thd, "commit_after_release_LOCK_prepare_ordered"); + DEBUG_SYNC(orig_entry->thd, "commit_after_release_LOCK_prepare_ordered"); - return orig_queue == NULL; + DBUG_PRINT("info", ("Queued for group commit as %s\n", + (orig_queue == NULL) ? "leader" : "participant")); + DBUG_RETURN(orig_queue == NULL); } bool From c168c49d77f13a4e9e6fe07169baeff70dd9eb24 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 19 Sep 2013 20:54:08 +0200 Subject: [PATCH 21/41] MDEV-4506: Parallel replication: Fix Windows compiler failure. --- sql/rpl_record.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/rpl_record.h b/sql/rpl_record.h index 7369edf1379..7d17d4f7200 100644 --- a/sql/rpl_record.h +++ b/sql/rpl_record.h @@ -21,7 +21,7 @@ #include #include "my_global.h" /* uchar */ -class rpl_group_info; +struct rpl_group_info; struct TABLE; typedef struct st_bitmap MY_BITMAP; From 5d1d20e40982a09a1f279dfd85b45583195cc5b8 Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 23 Sep 2013 10:22:46 +0200 Subject: [PATCH 22/41] MDEV-4506: parallel replication. Remove some unnecessary mutex locking. --- sql/log.cc | 32 +++++++++++++------------------- sql/sql_class.cc | 22 ++++++++++------------ 2 files changed, 23 insertions(+), 31 deletions(-) diff --git a/sql/log.cc b/sql/log.cc index f965f5963b3..590c062351c 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -6756,29 +6756,23 @@ MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *orig_entry) DBUG_ASSERT(entry != NULL); } - /* Now we need to clear the wakeup_subsequent_commits_running flags. */ + /* + Now we need to clear the wakeup_subsequent_commits_running flags. + + We need a full memory barrier between walking the list above, and clearing + the flag wakeup_subsequent_commits_running below. This barrier is needed + to ensure that no other thread will start to modify the list pointers + before we are done traversing the list. + + But wait_for_commit::wakeup(), which was called above for any other thread + that might modify the list in parallel, does a full memory barrier already + (it locks a mutex). + */ if (list) { for (;;) { - if (list->wakeup_subsequent_commits_running) - { - /* - ToDo: We should not need a full lock/unlock of LOCK_wait_commit - here. All we need is a single (full) memory barrier, to ensure that - the reads of the list above are not reordered with the write of - wakeup_subsequent_commits_running, or with the writes to the list - from other threads that is allowed to happen after - wakeup_subsequent_commits_running has been set to false. - - We do not currently have explicit memory barrier primitives in the - source tree, but if we get them the below mysql_mutex_lock() could - be replaced with a full memory barrier just before the loop. - */ - mysql_mutex_lock(&list->LOCK_wait_commit); - list->wakeup_subsequent_commits_running= false; - mysql_mutex_unlock(&list->LOCK_wait_commit); - } + list->wakeup_subsequent_commits_running= false; if (list == last) break; list= list->next_subsequent_commit; diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 9a638947257..4c433638a77 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -5624,6 +5624,10 @@ wait_for_commit::wakeup() Otherwise we would need to somehow ensure that they were done waking up before we could allow this THD to be destroyed, which would be annoying and unnecessary. + + Note that wakeup_subsequent_commits2() depends on this function being a + full memory barrier (it is, because it takes a mutex lock). + */ mysql_mutex_lock(&LOCK_wait_commit); waiting_for_commit= false; @@ -5755,21 +5759,15 @@ wait_for_commit::wakeup_subsequent_commits2() } /* - ToDo: We should not need a full lock/unlock of LOCK_wait_commit here. All - we need is a (full) memory barrier, to ensure that the reads of the list - above are not reordered with the write of - wakeup_subsequent_commits_running, or with the writes to the list from - other threads that is allowed to happen after - wakeup_subsequent_commits_running has been set to false. + We need a full memory barrier between walking the list above, and clearing + the flag wakeup_subsequent_commits_running below. This barrier is needed + to ensure that no other thread will start to modify the list pointers + before we are done traversing the list. - We do not currently have explicit memory barrier primitives in the source - tree, but if we get them the below mysql_mutex_lock() could be replaced - with a full memory barrier. It is probably not important, the lock is not - contented and will likely be in the CPU cache since we took it just before. + But wait_for_commit::wakeup() does a full memory barrier already (it locks + a mutex), so no extra explicit barrier is needed here. */ - mysql_mutex_lock(&LOCK_wait_commit); wakeup_subsequent_commits_running= false; - mysql_mutex_unlock(&LOCK_wait_commit); } From 976606d0318465e40670535f6353849d83d1c78f Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 23 Sep 2013 14:46:57 +0200 Subject: [PATCH 23/41] MDEV-4506: Parallel replication: After-review fixes. --- sql/rpl_parallel.cc | 98 +++++++++++++++++++++++++++++++-------------- sql/rpl_parallel.h | 1 - sql/sql_class.cc | 2 +- 3 files changed, 70 insertions(+), 31 deletions(-) diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index def0fe7c756..ce3170bb774 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -132,7 +132,6 @@ handle_rpl_parallel_thread(void *arg) while (!(events= rpt->event_queue) && !rpt->stop && !thd->killed) mysql_cond_wait(&rpt->COND_rpl_thread, &rpt->LOCK_rpl_thread); /* Mark that this thread is now executing */ - rpt->free= false; rpt->event_queue= rpt->last_in_queue= NULL; thd->exit_cond(old_msg); @@ -216,12 +215,24 @@ handle_rpl_parallel_thread(void *arg) { in_event_group= false; + /* + Remove any left-over registration to wait for a prior commit to + complete. Normally, such wait would already have been removed at + this point by wait_for_prior_commit(), but eg. in error case we + might have skipped waiting, so we would need to remove it explicitly. + */ rgi->commit_orderer.unregister_wait_for_prior_commit(); thd->wait_for_commit_ptr= NULL; /* - Record that we have finished, so other event groups will no - longer attempt to wait for us to commit. + Record that this event group has finished (eg. transaction is + committed, if transactional), so other event groups will no longer + attempt to wait for us to commit. Once we have increased + entry->last_committed_sub_id, no other threads will execute + register_wait_for_prior_commit() against us. Thus, by doing one + extra (usually redundant) wakeup_subsequent_commits() we can ensure + that no register_wait_for_prior_commit() can ever happen without a + subsequent wakeup_subsequent_commits() to wake it up. We can race here with the next transactions, but that is fine, as long as we check that we do not decrease last_committed_sub_id. If @@ -246,6 +257,11 @@ handle_rpl_parallel_thread(void *arg) mysql_mutex_lock(&rpt->LOCK_rpl_thread); if ((events= rpt->event_queue) != NULL) { + /* + Take next group of events from the replication pool. + This is faster than having to wakeup the pool manager thread to give us + a new event. + */ rpt->event_queue= rpt->last_in_queue= NULL; mysql_mutex_unlock(&rpt->LOCK_rpl_thread); goto more_events; @@ -254,7 +270,7 @@ handle_rpl_parallel_thread(void *arg) if (!in_event_group) { rpt->current_entry= NULL; - if (!rpt->stop && !rpt->free) + if (!rpt->stop) { mysql_mutex_lock(&rpt->pool->LOCK_rpl_thread_pool); list= rpt->pool->free_list; @@ -263,7 +279,6 @@ handle_rpl_parallel_thread(void *arg) if (!list) mysql_cond_broadcast(&rpt->pool->COND_rpl_thread_pool); mysql_mutex_unlock(&rpt->pool->LOCK_rpl_thread_pool); - rpt->free= true; } } } @@ -300,6 +315,7 @@ rpl_parallel_change_thread_count(rpl_parallel_thread_pool *pool, uint32 i; rpl_parallel_thread **new_list= NULL; rpl_parallel_thread *new_free_list= NULL; + rpl_parallel_thread *rpt_array= NULL; /* Allocate the new list of threads up-front. @@ -307,10 +323,13 @@ rpl_parallel_change_thread_count(rpl_parallel_thread_pool *pool, to allocate, and will not be left with a half-functional thread pool. */ if (new_count && - !(new_list= (rpl_parallel_thread **)my_malloc(new_count*sizeof(*new_list), - MYF(MY_WME)))) + !my_multi_malloc(MYF(MY_WME|MY_ZEROFILL), + &new_list, new_count*sizeof(*new_list), + &rpt_array, new_count*sizeof(*rpt_array), + NULL)) { - my_error(ER_OUTOFMEMORY, MYF(0), (int(new_count*sizeof(*new_list)))); + my_error(ER_OUTOFMEMORY, MYF(0), (int(new_count*sizeof(*new_list) + + new_count*sizeof(*rpt_array)))); goto err;; } @@ -318,28 +337,16 @@ rpl_parallel_change_thread_count(rpl_parallel_thread_pool *pool, { pthread_t th; - if (!(new_list[i]= (rpl_parallel_thread *)my_malloc(sizeof(*(new_list[i])), - MYF(MY_WME)))) - { - my_error(ER_OUTOFMEMORY, MYF(0), sizeof(*(new_list[i]))); - goto err; - } + new_list[i]= &rpt_array[i]; new_list[i]->delay_start= true; - new_list[i]->running= false; - new_list[i]->stop= false; - new_list[i]->free= false; mysql_mutex_init(key_LOCK_rpl_thread, &new_list[i]->LOCK_rpl_thread, MY_MUTEX_INIT_SLOW); mysql_cond_init(key_COND_rpl_thread, &new_list[i]->COND_rpl_thread, NULL); new_list[i]->pool= pool; - new_list[i]->current_entry= NULL; - new_list[i]->event_queue= NULL; - new_list[i]->last_in_queue= NULL; if (mysql_thread_create(key_rpl_parallel_thread, &th, NULL, handle_rpl_parallel_thread, new_list[i])) { my_error(ER_OUT_OF_RESOURCES, MYF(0)); - my_free(new_list[i]); goto err; } new_list[i]->next= new_free_list; @@ -364,6 +371,13 @@ rpl_parallel_change_thread_count(rpl_parallel_thread_pool *pool, mysql_mutex_unlock(&LOCK_active_mi); } + /* + Grab each old thread in turn, and signal it to stop. + + Note that since we require all replication threads to be stopped before + changing the parallel replication worker thread pool, all the threads will + be already idle and will terminate immediately. + */ for (i= 0; i < pool->count; ++i) { rpl_parallel_thread *rpt= pool->get_thread(NULL); @@ -381,7 +395,6 @@ rpl_parallel_change_thread_count(rpl_parallel_thread_pool *pool, mysql_mutex_unlock(&rpt->LOCK_rpl_thread); mysql_mutex_destroy(&rpt->LOCK_rpl_thread); mysql_cond_destroy(&rpt->COND_rpl_thread); - my_free(rpt); } my_free(pool->threads); @@ -409,7 +422,6 @@ err: { while (new_free_list) { - rpl_parallel_thread *next= new_free_list->next; mysql_mutex_lock(&new_free_list->LOCK_rpl_thread); new_free_list->delay_start= false; new_free_list->stop= true; @@ -421,8 +433,7 @@ err: mysql_cond_wait(&new_free_list->COND_rpl_thread, &new_free_list->LOCK_rpl_thread); mysql_mutex_unlock(&new_free_list->LOCK_rpl_thread); - my_free(new_free_list); - new_free_list= next; + new_free_list= new_free_list->next; } my_free(new_list); } @@ -471,6 +482,12 @@ rpl_parallel_thread_pool::destroy() } +/* + Wait for a worker thread to become idle. When one does, grab the thread for + our use and return it. + + Note that we return with the worker threads's LOCK_rpl_thread mutex locked. +*/ struct rpl_parallel_thread * rpl_parallel_thread_pool::get_thread(rpl_parallel_entry *entry) { @@ -571,7 +588,7 @@ rpl_parallel::do_event(rpl_group_info *serial_rgi, Log_event *ev) rpl_parallel_entry *e; rpl_parallel_thread *cur_thread; rpl_parallel_thread::queued_event *qev; - rpl_group_info *rgi; + rpl_group_info *rgi= NULL; Relay_log_info *rli= serial_rgi->rli; enum Log_event_type typ; @@ -596,6 +613,7 @@ rpl_parallel::do_event(rpl_group_info *serial_rgi, Log_event *ev) event_group_new_gtid(rgi, gtid_ev)) { my_error(ER_OUT_OF_RESOURCES, MYF(MY_WME)); + delete rgi; return true; } if ((rgi->deferred_events_collecting= rli->mi->rpl_filter->is_on())) @@ -622,14 +640,33 @@ rpl_parallel::do_event(rpl_group_info *serial_rgi, Log_event *ev) } else { - /* Check if we already have a worker thread for this entry. */ + /* + Check if we already have a worker thread for this entry. + + We continue to queue more events up for the worker thread while it is + still executing the first ones, to be able to start executing a large + event group without having to wait for the end to be fetched from the + master. And we continue to queue up more events after the first group, + avoiding the overhead of worker threads constantly entering and + leaving the worker thread free list. + + But if the worker thread is idle at any point, it may return to the + idle list or be servicing a different request. So check this, and + allocate a new thread if the old one is no longer processing for us. + */ cur_thread= e->rpl_thread; if (cur_thread) { mysql_mutex_lock(&cur_thread->LOCK_rpl_thread); if (cur_thread->current_entry != e) { - /* Not ours anymore, we need to grab a new one. */ + /* + The worker thread became idle, and returned to the free list and + possibly was allocated to a different request. This also means + that everything previously queued has already been executed, else + the worker thread would not have become idle. So we should + allocate a new worker thread. + */ mysql_mutex_unlock(&cur_thread->LOCK_rpl_thread); e->rpl_thread= cur_thread= NULL; } @@ -682,6 +719,9 @@ rpl_parallel::do_event(rpl_group_info *serial_rgi, Log_event *ev) Events like ROTATE and FORMAT_DESCRIPTION. Do not run in worker thread. Same for events not preceeded by GTID (we should not see those normally, but they might be from an old master). + + The varuable `current' is NULL for the case where the master did not + have GTID, like a MariaDB 5.5 or MySQL master. */ qev->rgi= serial_rgi; rpt_handle_event(qev, NULL); @@ -719,8 +759,8 @@ rpl_parallel::do_event(rpl_group_info *serial_rgi, Log_event *ev) else cur_thread->event_queue= qev; cur_thread->last_in_queue= qev; - mysql_cond_signal(&cur_thread->COND_rpl_thread); mysql_mutex_unlock(&cur_thread->LOCK_rpl_thread); + mysql_cond_signal(&cur_thread->COND_rpl_thread); return false; } diff --git a/sql/rpl_parallel.h b/sql/rpl_parallel.h index d30540c3d03..1edce8f047d 100644 --- a/sql/rpl_parallel.h +++ b/sql/rpl_parallel.h @@ -13,7 +13,6 @@ struct rpl_parallel_thread { bool delay_start; bool running; bool stop; - bool free; mysql_mutex_t LOCK_rpl_thread; mysql_cond_t COND_rpl_thread; struct rpl_parallel_thread *next; /* For free list. */ diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 4c433638a77..714adfba8f7 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -5631,8 +5631,8 @@ wait_for_commit::wakeup() */ mysql_mutex_lock(&LOCK_wait_commit); waiting_for_commit= false; - mysql_cond_signal(&COND_wait_commit); mysql_mutex_unlock(&LOCK_wait_commit); + mysql_cond_signal(&COND_wait_commit); } From 45c3c71513b68b8de79f3e0a5e9779e7e8021716 Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 30 Sep 2013 10:41:41 +0200 Subject: [PATCH 24/41] MDEV-4506: Parallel replication. .result file updates + a few comment updates. --- mysql-test/suite/innodb/r/group_commit_binlog_pos.result | 2 +- .../r/group_commit_binlog_pos_no_optimize_thread.result | 2 +- sql/rpl_parallel.cc | 7 ++++--- sql/rpl_parallel.h | 4 ++++ 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/mysql-test/suite/innodb/r/group_commit_binlog_pos.result b/mysql-test/suite/innodb/r/group_commit_binlog_pos.result index c8b80a037a7..23f80b01a8d 100644 --- a/mysql-test/suite/innodb/r/group_commit_binlog_pos.result +++ b/mysql-test/suite/innodb/r/group_commit_binlog_pos.result @@ -31,6 +31,6 @@ a 1 2 3 -InnoDB: Last MySQL binlog file position 0 922, file name ./master-bin.000001 +InnoDB: Last MySQL binlog file position 0 926, file name ./master-bin.000001 SET DEBUG_SYNC= 'RESET'; DROP TABLE t1; diff --git a/mysql-test/suite/innodb/r/group_commit_binlog_pos_no_optimize_thread.result b/mysql-test/suite/innodb/r/group_commit_binlog_pos_no_optimize_thread.result index 090b574a962..3ef8a4acc0f 100644 --- a/mysql-test/suite/innodb/r/group_commit_binlog_pos_no_optimize_thread.result +++ b/mysql-test/suite/innodb/r/group_commit_binlog_pos_no_optimize_thread.result @@ -32,6 +32,6 @@ a 1 2 3 -InnoDB: Last MySQL binlog file position 0 922, file name ./master-bin.000001 +InnoDB: Last MySQL binlog file position 0 926, file name ./master-bin.000001 SET DEBUG_SYNC= 'RESET'; DROP TABLE t1; diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index ce3170bb774..6c8c5b5c3fa 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -647,11 +647,12 @@ rpl_parallel::do_event(rpl_group_info *serial_rgi, Log_event *ev) still executing the first ones, to be able to start executing a large event group without having to wait for the end to be fetched from the master. And we continue to queue up more events after the first group, - avoiding the overhead of worker threads constantly entering and - leaving the worker thread free list. + so that we can continue to process subsequent parts of the relay log in + parallel without having to wait for previous long-running events to + complete. But if the worker thread is idle at any point, it may return to the - idle list or be servicing a different request. So check this, and + idle list or start servicing a different request. So check this, and allocate a new thread if the old one is no longer processing for us. */ cur_thread= e->rpl_thread; diff --git a/sql/rpl_parallel.h b/sql/rpl_parallel.h index 1edce8f047d..8dfd0297199 100644 --- a/sql/rpl_parallel.h +++ b/sql/rpl_parallel.h @@ -57,6 +57,10 @@ struct rpl_parallel_entry { uint64 last_committed_sub_id; mysql_mutex_t LOCK_parallel_entry; mysql_cond_t COND_parallel_entry; + /* + The sub_id of the last event group in this replication domain that was + queued for execution by a worker thread. + */ uint64 current_sub_id; rpl_group_info *current_group_info; /* From 12c760ef71167a1ce6e1adaa084fb196b88e2e55 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 8 Oct 2013 14:36:06 +0200 Subject: [PATCH 25/41] MDEV-4506: Parallel replication. Improve STOP SLAVE in parallel mode. Now, the parallel part will queue the current event group to the end, and then stop queing any more events. Each worker will complete the current event group, and then just skip any further queued events. --- mysql-test/suite/rpl/r/rpl_parallel.result | 45 +++++++++++++- mysql-test/suite/rpl/t/rpl_parallel.test | 68 +++++++++++++++++++++- sql/rpl_parallel.cc | 56 +++++++++++++++--- sql/rpl_parallel.h | 1 + 4 files changed, 161 insertions(+), 9 deletions(-) diff --git a/mysql-test/suite/rpl/r/rpl_parallel.result b/mysql-test/suite/rpl/r/rpl_parallel.result index 2e13d914e62..03b102a1af9 100644 --- a/mysql-test/suite/rpl/r/rpl_parallel.result +++ b/mysql-test/suite/rpl/r/rpl_parallel.result @@ -117,7 +117,6 @@ include/start_slave.inc FLUSH LOGS; CREATE TABLE t3 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB; INSERT INTO t3 VALUES (1,1), (3,3), (5,5), (7,7); -SET binlog_format=@old_format; BEGIN; INSERT INTO t3 VALUES (2,102); BEGIN; @@ -211,6 +210,50 @@ slave-bin.000003 # Query # # use `test`; INSERT INTO t3 VALUES (6, foo(16, 'group_commit_waiting_for_prior SIGNAL slave_queued3', '')) slave-bin.000003 # Xid # # COMMIT /* XID */ +*** Test STOP SLAVE in parallel mode *** +include/stop_slave.inc +SET binlog_direct_non_transactional_updates=0; +SET sql_log_bin=0; +CALL mtr.add_suppression("Statement is unsafe because it accesses a non-transactional table after accessing a transactional table within the same transaction"); +SET sql_log_bin=1; +BEGIN; +INSERT INTO t2 VALUES (20); +INSERT INTO t1 VALUES (20); +INSERT INTO t2 VALUES (21); +INSERT INTO t3 VALUES (20, 20); +COMMIT; +INSERT INTO t3 VALUES(21, 21); +INSERT INTO t3 VALUES(22, 22); +SET binlog_format=@old_format; +BEGIN; +INSERT INTO t2 VALUES (21); +START SLAVE; +STOP SLAVE; +ROLLBACK; +include/wait_for_slave_to_stop.inc +SELECT * FROM t1 WHERE a >= 20 ORDER BY a; +a +20 +SELECT * FROM t2 WHERE a >= 20 ORDER BY a; +a +20 +21 +SELECT * FROM t3 WHERE a >= 20 ORDER BY a; +a b +20 20 +include/start_slave.inc +SELECT * FROM t1 WHERE a >= 20 ORDER BY a; +a +20 +SELECT * FROM t2 WHERE a >= 20 ORDER BY a; +a +20 +21 +SELECT * FROM t3 WHERE a >= 20 ORDER BY a; +a b +20 20 +21 21 +22 22 include/stop_slave.inc SET GLOBAL binlog_format=@old_format; SET GLOBAL slave_parallel_threads=0; diff --git a/mysql-test/suite/rpl/t/rpl_parallel.test b/mysql-test/suite/rpl/t/rpl_parallel.test index bfc6283da66..89834b790d6 100644 --- a/mysql-test/suite/rpl/t/rpl_parallel.test +++ b/mysql-test/suite/rpl/t/rpl_parallel.test @@ -165,7 +165,6 @@ CREATE TABLE t3 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB; # Create some sentinel rows so that the rows inserted in parallel fall into # separate gaps and do not cause gap lock conflicts. INSERT INTO t3 VALUES (1,1), (3,3), (5,5), (7,7); -SET binlog_format=@old_format; --save_master_pos --connection server_2 --sync_with_master @@ -264,6 +263,73 @@ SELECT * FROM t3 ORDER BY a; --source include/show_binlog_events.inc +--echo *** Test STOP SLAVE in parallel mode *** +--connection server_2 +--source include/stop_slave.inc + +--connection server_1 +# Set up a couple of transactions. The first will be blocked halfway +# through on a lock, and while it is blocked we initiate STOP SLAVE. +# We then test that the halfway-initiated transaction is allowed to +# complete, but no subsequent ones. +# We have to use statement-based mode and set +# binlog_direct_non_transactional_updates=0; otherwise the binlog will +# be split into two event groups, one for the MyISAM part and one for the +# InnoDB part. +SET binlog_direct_non_transactional_updates=0; +SET sql_log_bin=0; +CALL mtr.add_suppression("Statement is unsafe because it accesses a non-transactional table after accessing a transactional table within the same transaction"); +SET sql_log_bin=1; +BEGIN; +INSERT INTO t2 VALUES (20); +--disable_warnings +INSERT INTO t1 VALUES (20); +--disable_warnings +INSERT INTO t2 VALUES (21); +INSERT INTO t3 VALUES (20, 20); +COMMIT; +INSERT INTO t3 VALUES(21, 21); +INSERT INTO t3 VALUES(22, 22); +SET binlog_format=@old_format; +--save_master_pos + +# Start a connection that will block the replicated transaction halfway. +--connection con_temp1 +BEGIN; +INSERT INTO t2 VALUES (21); + +--connection server_2 +START SLAVE; +# Wait for the MyISAM change to be visible, after which replication will wait +# for con_temp1 to roll back. +--let $wait_condition= SELECT COUNT(*) = 1 FROM t1 WHERE a=20 +--source include/wait_condition.inc + +--connection con_temp2 +# Initiate slave stop. It will have to wait for the current event group +# to complete. +send STOP SLAVE; + +--connection con_temp1 +ROLLBACK; + +--connection con_temp2 +reap; + +--connection server_2 +--source include/wait_for_slave_to_stop.inc +# We should see the first transaction applied, but not the two others. +SELECT * FROM t1 WHERE a >= 20 ORDER BY a; +SELECT * FROM t2 WHERE a >= 20 ORDER BY a; +SELECT * FROM t3 WHERE a >= 20 ORDER BY a; + +--source include/start_slave.inc +--sync_with_master +SELECT * FROM t1 WHERE a >= 20 ORDER BY a; +SELECT * FROM t2 WHERE a >= 20 ORDER BY a; +SELECT * FROM t3 WHERE a >= 20 ORDER BY a; + + --connection server_2 --source include/stop_slave.inc SET GLOBAL binlog_format=@old_format; diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index 6c8c5b5c3fa..e80512a3580 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -77,6 +77,28 @@ rpt_handle_event(rpl_parallel_thread::queued_event *qev, } +static bool +sql_worker_killed(THD *thd, rpl_group_info *rgi, bool in_event_group) +{ + if (!rgi->rli->abort_slave && !abort_loop) + return false; + + /* + Do not abort in the middle of an event group that cannot be rolled back. + */ + if ((thd->transaction.all.modified_non_trans_table || + (thd->variables.option_bits & OPTION_KEEP_LOG)) + && in_event_group) + return false; + /* ToDo: should we add some timeout like in sql_slave_killed? + if (rgi->last_event_start_time == 0) + rgi->last_event_start_time= my_time(0); + */ + + return true; +} + + pthread_handler_t handle_rpl_parallel_thread(void *arg) { @@ -131,7 +153,6 @@ handle_rpl_parallel_thread(void *arg) "Waiting for work from SQL thread"); while (!(events= rpt->event_queue) && !rpt->stop && !thd->killed) mysql_cond_wait(&rpt->COND_rpl_thread, &rpt->LOCK_rpl_thread); - /* Mark that this thread is now executing */ rpt->event_queue= rpt->last_in_queue= NULL; thd->exit_cond(old_msg); @@ -159,7 +180,7 @@ handle_rpl_parallel_thread(void *arg) (0 != (static_cast(events->ev)->flags2 & Gtid_log_event::FL_STANDALONE)); - /* Save this, as it gets cleared once event group commits. */ + /* Save this, as it gets cleared when the event group commits. */ event_gtid_sub_id= rgi->gtid_sub_id; rgi->thd= thd; @@ -197,7 +218,16 @@ handle_rpl_parallel_thread(void *arg) thd->wait_for_commit_ptr= &rgi->commit_orderer; } - rpt_handle_event(events, rpt); + /* + If the SQL thread is stopping, we just skip execution of all the + following event groups. We still do all the normal waiting and wakeup + processing between the event groups as a simple way to ensure that + everything is stopped and cleaned up correctly. + */ + if (!sql_worker_killed(thd, rgi, in_event_group)) + rpt_handle_event(events, rpt); + else + thd->wait_for_prior_commit(); end_of_group= in_event_group && @@ -207,7 +237,6 @@ handle_rpl_parallel_thread(void *arg) (!strcmp("COMMIT", ((Query_log_event *)events->ev)->query) || !strcmp("ROLLBACK", ((Query_log_event *)events->ev)->query)))); - /* ToDo: must use rgi here, not rli, for thread safety. */ delete_or_keep_event_post_apply(rgi, event_type, events->ev); my_free(events); @@ -516,7 +545,7 @@ free_rpl_parallel_entry(void *element) rpl_parallel::rpl_parallel() : - current(NULL) + current(NULL), sql_thread_stopping(false) { my_hash_init(&domain_hash, &my_charset_bin, 32, offsetof(rpl_parallel_entry, domain_id), sizeof(uint32), @@ -529,6 +558,7 @@ rpl_parallel::reset() { my_hash_reset(&domain_hash); current= NULL; + sql_thread_stopping= false; } @@ -591,10 +621,22 @@ rpl_parallel::do_event(rpl_group_info *serial_rgi, Log_event *ev) rpl_group_info *rgi= NULL; Relay_log_info *rli= serial_rgi->rli; enum Log_event_type typ; + bool is_group_event; /* ToDo: what to do with this lock?!? */ mysql_mutex_unlock(&rli->data_lock); + /* + Stop queueing additional event groups once the SQL thread is requested to + stop. + */ + if (((typ= ev->get_type_code()) == GTID_EVENT || + !(is_group_event= Log_event::is_group_event(typ))) && + rli->abort_slave) + sql_thread_stopping= true; + if (sql_thread_stopping) + return false; + if (!(qev= (rpl_parallel_thread::queued_event *)my_malloc(sizeof(*qev), MYF(0)))) { @@ -604,7 +646,7 @@ rpl_parallel::do_event(rpl_group_info *serial_rgi, Log_event *ev) qev->ev= ev; qev->next= NULL; - if ((typ= ev->get_type_code()) == GTID_EVENT) + if (typ == GTID_EVENT) { Gtid_log_event *gtid_ev= static_cast(ev); @@ -714,7 +756,7 @@ rpl_parallel::do_event(rpl_group_info *serial_rgi, Log_event *ev) e->current_sub_id= rgi->gtid_sub_id; current= rgi->parallel_entry= e; } - else if (!Log_event::is_group_event(typ) || !current) + else if (!is_group_event || !current) { /* Events like ROTATE and FORMAT_DESCRIPTION. Do not run in worker thread. diff --git a/sql/rpl_parallel.h b/sql/rpl_parallel.h index 8dfd0297199..b9106392faf 100644 --- a/sql/rpl_parallel.h +++ b/sql/rpl_parallel.h @@ -76,6 +76,7 @@ struct rpl_parallel_entry { struct rpl_parallel { HASH domain_hash; rpl_parallel_entry *current; + bool sql_thread_stopping; rpl_parallel(); ~rpl_parallel(); From 3784432256a30e4d453dde10c875d8446519e7c1 Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Sun, 13 Oct 2013 23:20:57 +0300 Subject: [PATCH 26/41] Give a warning, not an error, if the log file size in innodb doesn't match what is on disk This helps when moving from MariaDB 5.5 to MariaDB 10.0 as sometimes the log file size is rounded differently. storage/innobase/srv/srv0start.c: Give a warning, not an error, if the log file size in innodb doesn't match what is on disk storage/xtradb/srv/srv0start.c: Give a warning, not an error, if the log file size in innodb doesn't match what is on disk --- storage/innobase/srv/srv0start.c | 5 +++-- storage/xtradb/srv/srv0start.c | 6 ++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c index 86669a50895..0cf3eb6c6b7 100644 --- a/storage/innobase/srv/srv0start.c +++ b/storage/innobase/srv/srv0start.c @@ -621,7 +621,7 @@ open_or_create_log_file( || size_high != srv_calc_high32(srv_log_file_size)) { fprintf(stderr, - "InnoDB: Error: log file %s is" + "InnoDB: Warning: log file %s is" " of different size %lu %lu bytes\n" "InnoDB: than specified in the .cnf" " file %lu %lu bytes!\n", @@ -629,7 +629,8 @@ open_or_create_log_file( (ulong) srv_calc_high32(srv_log_file_size), (ulong) srv_calc_low32(srv_log_file_size)); - return(DB_ERROR); + srv_log_file_size= size + + (((longlong) size_high) << 32); } } else { *log_file_created = TRUE; diff --git a/storage/xtradb/srv/srv0start.c b/storage/xtradb/srv/srv0start.c index 9e0477253cd..bafbd00e654 100644 --- a/storage/xtradb/srv/srv0start.c +++ b/storage/xtradb/srv/srv0start.c @@ -624,7 +624,7 @@ open_or_create_log_file( || size_high != srv_calc_high32(srv_log_file_size)) { fprintf(stderr, - "InnoDB: Error: log file %s is" + "InnoDB: Warning: log file %s is" " of different size %lu %lu bytes\n" "InnoDB: than specified in the .cnf" " file %lu %lu bytes!\n", @@ -632,7 +632,9 @@ open_or_create_log_file( (ulong) srv_calc_high32(srv_log_file_size), (ulong) srv_calc_low32(srv_log_file_size)); - return(DB_ERROR); + srv_log_file_size= ((size + + (((longlong) size_high) << 32)) / + UNIV_PAGE_SIZE); } } else { *log_file_created = TRUE; From 2e100cc5a493b6a0f6f907e0483a734c7fee2087 Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Mon, 14 Oct 2013 00:24:05 +0300 Subject: [PATCH 27/41] Fixes for parallel slave: - Made slaves temporary table multi-thread slave safe by adding mutex around save_temporary_table usage. - rli->save_temporary_tables is the active list of all used temporary tables - This is copied to THD->temporary_tables when temporary tables are opened and updated when temporary tables are closed - Added THD->lock_temporary_tables() and THD->unlock_temporary_tables() to simplify this. - Relay_log_info->sql_thd renamed to Relay_log_info->sql_driver_thd to avoid wrong usage for merged code. - Added is_part_of_group() to mark functions that are part of the next function. This replaces setting IN_STMT when events are executed. - Added is_begin(), is_commit() and is_rollback() functions to Query_log_event to simplify code. - If slave_skip_counter is set run things in single threaded mode. This simplifies code for skipping events. - Updating state of relay log (IN_STMT and IN_TRANSACTION) is moved to one single function: update_state_of_relay_log() We can't use OPTION_BEGIN to check for the state anymore as the sql_driver and sql execution threads may be different. Clear IN_STMT and IN_TRANSACTION in init_relay_log_pos() and Relay_log_info::cleanup_context() to ensure the flags doesn't survive slave restarts is_in_group() is now independent of state of executed transaction. - Reset thd->transaction.all.modified_non_trans_table() if we did set it for single table row events. This was mainly for keeping the flag as documented. - Changed slave_open_temp_tables to uint32 to be able to use atomic operators on it. - Relay_log_info::sleep_lock -> rpl_group_info::sleep_lock - Relay_log_info::sleep_cond -> rpl_group_info::sleep_cond - Changed some functions to take rpl_group_info instead of Relay_log_info to make them multi-slave safe and to simplify usage - do_shall_skip() - continue_group() - sql_slave_killed() - next_event() - Simplifed arguments to io_salve_killed(), check_io_slave_killed() and sql_slave_killed(); No reason to supply THD as this is part of the given structure. - set_thd_in_use_temporary_tables() removed as in_use is set on usage - Added information to thd_proc_info() which thread is waiting for slave mutex to exit. - In open_table() reuse code from find_temporary_table() Other things: - More DBUG statements - Fixed the rpl_incident.test can be run with --debug - More comments - Disabled not used function rpl_connect_master() mysql-test/suite/perfschema/r/all_instances.result: Moved sleep_lock and sleep_cond to rpl_group_info mysql-test/suite/rpl/r/rpl_incident.result: Updated result mysql-test/suite/rpl/t/rpl_incident-master.opt: Not needed anymore mysql-test/suite/rpl/t/rpl_incident.test: Fixed that test can be run with --debug sql/handler.cc: More DBUG_PRINT sql/log.cc: More comments sql/log_event.cc: Added DBUG statements do_shall_skip(), continue_group() now takes rpl_group_info param Use is_begin(), is_commit() and is_rollback() functions instead of inspecting query string We don't have set slaves temporary tables 'in_use' as this is now done when tables are opened. Removed IN_STMT flag setting. This is now done in update_state_of_relay_log() Use IN_TRANSACTION flag to test state of relay log. In rows_event_stmt_cleanup() reset thd->transaction.all.modified_non_trans_table if we had set this before. sql/log_event.h: do_shall_skip(), continue_group() now takes rpl_group_info param Added is_part_of_group() to mark events that are part of the next event. This replaces setting IN_STMT when events are executed. Added is_begin(), is_commit() and is_rollback() functions to Query_log_event to simplify code. sql/log_event_old.cc: Removed IN_STMT flag setting. This is now done in update_state_of_relay_log() do_shall_skip(), continue_group() now takes rpl_group_info param sql/log_event_old.h: Added is_part_of_group() to mark events that are part of the next event. do_shall_skip(), continue_group() now takes rpl_group_info param sql/mysqld.cc: Changed slave_open_temp_tables to uint32 to be able to use atomic operators on it. Relay_log_info::sleep_lock -> Rpl_group_info::sleep_lock Relay_log_info::sleep_cond -> Rpl_group_info::sleep_cond sql/mysqld.h: Updated types and names sql/rpl_gtid.cc: More DBUG sql/rpl_parallel.cc: Updated TODO section Set thd for event that is execution Use new is_begin(), is_commit() and is_rollback() functions. More comments sql/rpl_rli.cc: sql_thd -> sql_driver_thd Relay_log_info::sleep_lock -> rpl_group_info::sleep_lock Relay_log_info::sleep_cond -> rpl_group_info::sleep_cond Clear IN_STMT and IN_TRANSACTION in init_relay_log_pos() and Relay_log_info::cleanup_context() to ensure the flags doesn't survive slave restarts. Reset table->in_use for temporary tables as the table may have been used by another THD. Use IN_TRANSACTION instead of OPTION_BEGIN to check state of relay log. Removed IN_STMT flag setting. This is now done in update_state_of_relay_log() sql/rpl_rli.h: Changed relay log state flags to bit masks instead of bit positions (most other code we have uses bit masks) Added IN_TRANSACTION to mark if we are in a BEGIN ... COMMIT section. save_temporary_tables is now thread safe Relay_log_info::sleep_lock -> rpl_group_info::sleep_lock Relay_log_info::sleep_cond -> rpl_group_info::sleep_cond Relay_log_info->sql_thd renamed to Relay_log_info->sql_driver_thd to avoid wrong usage for merged code is_in_group() is now independent of state of executed transaction. sql/slave.cc: Simplifed arguments to io_salve_killed(), sql_slave_killed() and check_io_slave_killed(); No reason to supply THD as this is part of the given structure. set_thd_in_use_temporary_tables() removed as in_use is set on usage in sql_base.cc sql_thd -> sql_driver_thd More DBUG Added update_state_of_relay_log() which will calculate the IN_STMT and IN_TRANSACTION state of the relay log after the current element is executed. If slave_skip_counter is set run things in single threaded mode. Simplifed arguments to io_salve_killed(), check_io_slave_killed() and sql_slave_killed(); No reason to supply THD as this is part of the given structure. Added information to thd_proc_info() which thread is waiting for slave mutex to exit. Disabled not used function rpl_connect_master() Updated argument to next_event() sql/sql_base.cc: Added mutex around usage of slave's temporary tables. The active list is always kept up to date in sql->rgi_slave->save_temporary_tables. Clear thd->temporary_tables after query (safety) More DBUG When using temporary table, set table->in_use to current thd as the THD may be different for slave threads. Some code is ifdef:ed with REMOVE_AFTER_MERGE_WITH_10 as the given code in 10.0 is not yet in this tree. In open_table() reuse code from find_temporary_table() sql/sql_binlog.cc: rli->sql_thd -> rli->sql_driver_thd Remove duplicate setting of rgi->rli sql/sql_class.cc: Added helper functions rgi_lock_temporary_tables() and rgi_unlock_temporary_tables() Would have been nicer to have these inline, but there was no easy way to do that sql/sql_class.h: Added functions to protect slaves temporary tables sql/sql_parse.cc: Added DBUG_PRINT sql/transaction.cc: Added comment --- .../suite/perfschema/r/all_instances.result | 2 - mysql-test/suite/rpl/r/rpl_incident.result | 1 + .../suite/rpl/t/rpl_incident-master.opt | 1 - mysql-test/suite/rpl/t/rpl_incident.test | 7 + sql/handler.cc | 4 + sql/log.cc | 12 +- sql/log_event.cc | 154 ++++---- sql/log_event.h | 55 +-- sql/log_event_old.cc | 28 +- sql/log_event_old.h | 3 +- sql/mysqld.cc | 13 +- sql/mysqld.h | 6 +- sql/rpl_gtid.cc | 15 +- sql/rpl_parallel.cc | 21 +- sql/rpl_rli.cc | 49 ++- sql/rpl_rli.h | 95 +++-- sql/slave.cc | 328 ++++++++++++------ sql/sql_base.cc | 118 +++++-- sql/sql_binlog.cc | 5 +- sql/sql_class.cc | 18 + sql/sql_class.h | 21 ++ sql/sql_parse.cc | 1 + sql/transaction.cc | 5 + 23 files changed, 603 insertions(+), 359 deletions(-) delete mode 100644 mysql-test/suite/rpl/t/rpl_incident-master.opt diff --git a/mysql-test/suite/perfschema/r/all_instances.result b/mysql-test/suite/perfschema/r/all_instances.result index f338461f5cd..526d4ed30b6 100644 --- a/mysql-test/suite/perfschema/r/all_instances.result +++ b/mysql-test/suite/perfschema/r/all_instances.result @@ -88,7 +88,6 @@ wait/synch/mutex/sql/Query_cache::structure_guard_mutex wait/synch/mutex/sql/Relay_log_info::data_lock wait/synch/mutex/sql/Relay_log_info::log_space_lock wait/synch/mutex/sql/Relay_log_info::run_lock -wait/synch/mutex/sql/Relay_log_info::sleep_lock wait/synch/mutex/sql/Slave_reporting_capability::err_lock wait/synch/mutex/sql/TABLE_SHARE::LOCK_ha_data wait/synch/mutex/sql/THD::LOCK_thd_data @@ -146,7 +145,6 @@ wait/synch/cond/sql/MYSQL_RELAY_LOG::update_cond wait/synch/cond/sql/Query_cache::COND_cache_status_changed wait/synch/cond/sql/Relay_log_info::data_cond wait/synch/cond/sql/Relay_log_info::log_space_cond -wait/synch/cond/sql/Relay_log_info::sleep_cond wait/synch/cond/sql/Relay_log_info::start_cond wait/synch/cond/sql/Relay_log_info::stop_cond wait/synch/cond/sql/THD::COND_wakeup_ready diff --git a/mysql-test/suite/rpl/r/rpl_incident.result b/mysql-test/suite/rpl/r/rpl_incident.result index d528fb3297a..5e725e36389 100644 --- a/mysql-test/suite/rpl/r/rpl_incident.result +++ b/mysql-test/suite/rpl/r/rpl_incident.result @@ -8,6 +8,7 @@ a 1 2 3 +SET GLOBAL debug_dbug= '+d,incident_database_resync_on_replace,*'; REPLACE INTO t1 VALUES (4); SELECT * FROM t1; a diff --git a/mysql-test/suite/rpl/t/rpl_incident-master.opt b/mysql-test/suite/rpl/t/rpl_incident-master.opt deleted file mode 100644 index 912801debc4..00000000000 --- a/mysql-test/suite/rpl/t/rpl_incident-master.opt +++ /dev/null @@ -1 +0,0 @@ ---loose-debug=+d,incident_database_resync_on_replace diff --git a/mysql-test/suite/rpl/t/rpl_incident.test b/mysql-test/suite/rpl/t/rpl_incident.test index d6034009f4f..c591a8261c4 100644 --- a/mysql-test/suite/rpl/t/rpl_incident.test +++ b/mysql-test/suite/rpl/t/rpl_incident.test @@ -7,12 +7,19 @@ CREATE TABLE t1 (a INT); INSERT INTO t1 VALUES (1),(2),(3); SELECT * FROM t1; +let $debug_save= `SELECT @@GLOBAL.debug`; +SET GLOBAL debug_dbug= '+d,incident_database_resync_on_replace,*'; + # This will generate an incident log event and store it in the binary # log before the replace statement. REPLACE INTO t1 VALUES (4); --save_master_pos SELECT * FROM t1; +--disable_query_log +eval SET GLOBAL debug_dbug= '$debug_save'; +--enable_query_log + connection slave; # Wait until SQL thread stops with error LOST_EVENT on master call mtr.add_suppression("Slave SQL.*The incident LOST_EVENTS occured on the master.* 1590"); diff --git a/sql/handler.cc b/sql/handler.cc index 25b2ee13187..c42204b27d1 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -1247,6 +1247,8 @@ int ha_commit_trans(THD *thd, bool all) bool need_prepare_ordered, need_commit_ordered; my_xid xid; DBUG_ENTER("ha_commit_trans"); + DBUG_PRINT("info",("thd: %p option_bits: %lu all: %d", + thd, (ulong) thd->variables.option_bits, all)); /* Just a random warning to test warnings pushed during autocommit. */ DBUG_EXECUTE_IF("warn_during_ha_commit_trans", @@ -1306,6 +1308,8 @@ int ha_commit_trans(THD *thd, bool all) /* rw_trans is TRUE when we in a transaction changing data */ bool rw_trans= is_real_trans && (rw_ha_count > 0); MDL_request mdl_request; + DBUG_PRINT("info", ("is_real_trans: %d rw_trans: %d rw_ha_count: %d", + is_real_trans, rw_trans, rw_ha_count)); if (rw_trans) { diff --git a/sql/log.cc b/sql/log.cc index 590c062351c..dd6eeb3678c 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -6554,9 +6554,6 @@ MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd, the commit and wake them up. This way, all transactions in the queue get committed in a single disk operation. - The return value of this function is TRUE if queued as the first entry in - the queue (meaning this is the leader), FALSE otherwise. - The main work in this function is when the commit in one transaction has been marked to wait for the commit of another transaction to happen first. This is used to support in-order parallel replication, where @@ -6570,6 +6567,10 @@ MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd, transactions already prepared to commit but just waiting for the first one to commit. If so, we add those to the queue as well, transitively for all waiters. + + @retval TRUE If queued as the first entry in the queue (meaning this + is the leader) + @retval FALSE Otherwise */ bool @@ -6657,7 +6658,11 @@ MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *orig_entry) The end result is a breath-first traversal of the tree of waiters, re-using the next_subsequent_commit pointers in place of extra stack space in a recursive traversal. + + The temporary list created in next_subsequent_commit is not + used by the caller or any other function. */ + list= wfc; cur= list; last= list; @@ -7239,6 +7244,7 @@ MYSQL_BIN_LOG::write_transaction_or_stmt(group_commit_entry *entry, Note that this function may release and re-acquire LOCK_log and LOCK_prepare_ordered if it needs to wait. */ + void MYSQL_BIN_LOG::wait_for_sufficient_commits() { diff --git a/sql/log_event.cc b/sql/log_event.cc index cfbdd6aa626..59fc856c3f2 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -940,6 +940,8 @@ Log_event::Log_event(const char* buf, int Log_event::do_update_pos(rpl_group_info *rgi) { Relay_log_info *rli= rgi->rli; + DBUG_ENTER("Log_event::do_update_pos"); + /* rli is null when (as far as I (Guilhem) know) the caller is Load_log_event::do_apply_event *and* that one is called from @@ -973,13 +975,14 @@ int Log_event::do_update_pos(rpl_group_info *rgi) if (debug_not_change_ts_if_art_event == 0) debug_not_change_ts_if_art_event= 2; ); } - return 0; // Cannot fail currently + DBUG_RETURN(0); // Cannot fail currently } Log_event::enum_skip_reason -Log_event::do_shall_skip(Relay_log_info *rli) +Log_event::do_shall_skip(rpl_group_info *rgi) { + Relay_log_info *rli= rgi->rli; DBUG_PRINT("info", ("ev->server_id: %lu, ::server_id: %lu," " rli->replicate_same_server_id: %d," " rli->slave_skip_counter: %lu", @@ -2525,11 +2528,11 @@ void Log_event::print_timestamp(IO_CACHE* file, time_t* ts) #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) inline Log_event::enum_skip_reason -Log_event::continue_group(Relay_log_info *rli) +Log_event::continue_group(rpl_group_info *rgi) { - if (rli->slave_skip_counter == 1) + if (rgi->rli->slave_skip_counter == 1) return Log_event::EVENT_SKIP_IGNORE; - return Log_event::do_shall_skip(rli); + return Log_event::do_shall_skip(rgi); } #endif @@ -4263,11 +4266,13 @@ int Query_log_event::do_update_pos(rpl_group_info *rgi) Log_event::enum_skip_reason -Query_log_event::do_shall_skip(Relay_log_info *rli) +Query_log_event::do_shall_skip(rpl_group_info *rgi) { + Relay_log_info *rli= rgi->rli; DBUG_ENTER("Query_log_event::do_shall_skip"); DBUG_PRINT("debug", ("query: %s; q_len: %d", query, q_len)); DBUG_ASSERT(query && q_len > 0); + DBUG_ASSERT(thd == rgi->thd); /* An event skipped due to @@skip_replication must not be counted towards the @@ -4279,19 +4284,19 @@ Query_log_event::do_shall_skip(Relay_log_info *rli) if (rli->slave_skip_counter > 0) { - if (strcmp("BEGIN", query) == 0) + if (is_begin()) { thd->variables.option_bits|= OPTION_BEGIN; - DBUG_RETURN(Log_event::continue_group(rli)); + DBUG_RETURN(Log_event::continue_group(rgi)); } - if (strcmp("COMMIT", query) == 0 || strcmp("ROLLBACK", query) == 0) + if (is_commit() || is_rollback()) { thd->variables.option_bits&= ~OPTION_BEGIN; DBUG_RETURN(Log_event::EVENT_SKIP_COUNT); } } - DBUG_RETURN(Log_event::do_shall_skip(rli)); + DBUG_RETURN(Log_event::do_shall_skip(rgi)); } @@ -4465,7 +4470,7 @@ int Start_log_event_v3::do_apply_event(rpl_group_info *rgi) { DBUG_ENTER("Start_log_event_v3::do_apply_event"); int error= 0; - Relay_log_info const *rli= rgi->rli; + Relay_log_info *rli= rgi->rli; switch (binlog_version) { @@ -4479,24 +4484,14 @@ int Start_log_event_v3::do_apply_event(rpl_group_info *rgi) */ if (created) { - error= close_temporary_tables(thd); + rli->close_temporary_tables(); + /* The following is only false if we get here with a BINLOG statement */ if (rli->mi) cleanup_load_tmpdir(&rli->mi->cmp_connection_name); } - else - { - /* - Set all temporary tables thread references to the current thread - as they may point to the "old" SQL slave thread in case of its - restart. - */ - TABLE *table; - for (table= thd->temporary_tables; table; table= table->next) - table->in_use= thd; - } break; /* @@ -4511,7 +4506,7 @@ int Start_log_event_v3::do_apply_event(rpl_group_info *rgi) Can distinguish, based on the value of 'created': this event was generated at master startup. */ - error= close_temporary_tables(thd); + rli->close_temporary_tables(); } /* Otherwise, can't distinguish a Start_log_event generated at @@ -4895,7 +4890,7 @@ int Format_description_log_event::do_update_pos(rpl_group_info *rgi) } Log_event::enum_skip_reason -Format_description_log_event::do_shall_skip(Relay_log_info *rli) +Format_description_log_event::do_shall_skip(rpl_group_info *rgi) { return Log_event::EVENT_SKIP_NOT; } @@ -5970,8 +5965,8 @@ int Rotate_log_event::do_update_pos(rpl_group_info *rgi) flush_relay_log_info(rli); /* - Reset thd->variables.option_bits and sql_mode etc, because this could be the signal of - a master's downgrade from 5.0 to 4.0. + Reset thd->variables.option_bits and sql_mode etc, because this could + be the signal of a master's downgrade from 5.0 to 4.0. However, no need to reset description_event_for_exec: indeed, if the next master is 5.0 (even 5.0.1) we will soon get a Format_desc; if the next master is 4.0 then the events are in the slave's format (conversion). @@ -5991,9 +5986,9 @@ int Rotate_log_event::do_update_pos(rpl_group_info *rgi) Log_event::enum_skip_reason -Rotate_log_event::do_shall_skip(Relay_log_info *rli) +Rotate_log_event::do_shall_skip(rpl_group_info *rgi) { - enum_skip_reason reason= Log_event::do_shall_skip(rli); + enum_skip_reason reason= Log_event::do_shall_skip(rgi); switch (reason) { case Log_event::EVENT_SKIP_NOT: @@ -6302,8 +6297,9 @@ Gtid_log_event::do_update_pos(rpl_group_info *rgi) Log_event::enum_skip_reason -Gtid_log_event::do_shall_skip(Relay_log_info *rli) +Gtid_log_event::do_shall_skip(rpl_group_info *rgi) { + Relay_log_info *rli= rgi->rli; /* An event skipped due to @@skip_replication must not be counted towards the number of events to be skipped due to @@sql_slave_skip_counter. @@ -6315,10 +6311,13 @@ Gtid_log_event::do_shall_skip(Relay_log_info *rli) if (rli->slave_skip_counter > 0) { if (!(flags2 & FL_STANDALONE)) + { thd->variables.option_bits|= OPTION_BEGIN; - return Log_event::continue_group(rli); + DBUG_ASSERT(rgi->rli->get_flag(Relay_log_info::IN_TRANSACTION)); + } + return Log_event::continue_group(rgi); } - return Log_event::do_shall_skip(rli); + return Log_event::do_shall_skip(rgi); } @@ -6707,13 +6706,6 @@ void Intvar_log_event::print(FILE* file, PRINT_EVENT_INFO* print_event_info) int Intvar_log_event::do_apply_event(rpl_group_info *rgi) { - Relay_log_info *rli= rgi->rli; - /* - We are now in a statement until the associated query log event has - been processed. - */ - rli->set_flag(Relay_log_info::IN_STMT); - if (rgi->deferred_events_collecting) return rgi->deferred_events->add(this); @@ -6738,7 +6730,7 @@ int Intvar_log_event::do_update_pos(rpl_group_info *rgi) Log_event::enum_skip_reason -Intvar_log_event::do_shall_skip(Relay_log_info *rli) +Intvar_log_event::do_shall_skip(rpl_group_info *rgi) { /* It is a common error to set the slave skip counter to 1 instead of @@ -6748,7 +6740,7 @@ Intvar_log_event::do_shall_skip(Relay_log_info *rli) that we do not change the value of the slave skip counter since it will be decreased by the following insert event. */ - return continue_group(rli); + return continue_group(rgi); } #endif @@ -6818,13 +6810,6 @@ void Rand_log_event::print(FILE* file, PRINT_EVENT_INFO* print_event_info) #if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) int Rand_log_event::do_apply_event(rpl_group_info *rgi) { - Relay_log_info const *rli= rgi->rli; - /* - We are now in a statement until the associated query log event has - been processed. - */ - const_cast(rli)->set_flag(Relay_log_info::IN_STMT); - if (rgi->deferred_events_collecting) return rgi->deferred_events->add(this); @@ -6842,7 +6827,7 @@ int Rand_log_event::do_update_pos(rpl_group_info *rgi) Log_event::enum_skip_reason -Rand_log_event::do_shall_skip(Relay_log_info *rli) +Rand_log_event::do_shall_skip(rpl_group_info *rgi) { /* It is a common error to set the slave skip counter to 1 instead of @@ -6852,7 +6837,7 @@ Rand_log_event::do_shall_skip(Relay_log_info *rli) that we do not change the value of the slave skip counter since it will be decreased by the following insert event. */ - return continue_group(rli); + return continue_group(rgi); } /** @@ -6998,14 +6983,16 @@ int Xid_log_event::do_apply_event(rpl_group_info *rgi) } Log_event::enum_skip_reason -Xid_log_event::do_shall_skip(Relay_log_info *rli) +Xid_log_event::do_shall_skip(rpl_group_info *rgi) { DBUG_ENTER("Xid_log_event::do_shall_skip"); - if (rli->slave_skip_counter > 0) { + if (rgi->rli->slave_skip_counter > 0) + { + DBUG_ASSERT(!rgi->rli->get_flag(Relay_log_info::IN_TRANSACTION)); thd->variables.option_bits&= ~OPTION_BEGIN; DBUG_RETURN(Log_event::EVENT_SKIP_COUNT); } - DBUG_RETURN(Log_event::do_shall_skip(rli)); + DBUG_RETURN(Log_event::do_shall_skip(rgi)); } #endif /* !MYSQL_CLIENT */ @@ -7418,7 +7405,6 @@ int User_var_log_event::do_apply_event(rpl_group_info *rgi) { Item *it= 0; CHARSET_INFO *charset; - Relay_log_info const *rli= rgi->rli; DBUG_ENTER("User_var_log_event::do_apply_event"); if (rgi->deferred_events_collecting) @@ -7435,12 +7421,6 @@ int User_var_log_event::do_apply_event(rpl_group_info *rgi) double real_val; longlong int_val; - /* - We are now in a statement until the associated query log event has - been processed. - */ - const_cast(rli)->set_flag(Relay_log_info::IN_STMT); - if (is_null) { it= new Item_null(); @@ -7511,7 +7491,7 @@ int User_var_log_event::do_update_pos(rpl_group_info *rgi) } Log_event::enum_skip_reason -User_var_log_event::do_shall_skip(Relay_log_info *rli) +User_var_log_event::do_shall_skip(rpl_group_info *rgi) { /* It is a common error to set the slave skip counter to 1 instead @@ -7521,7 +7501,7 @@ User_var_log_event::do_shall_skip(Relay_log_info *rli) that we do not change the value of the slave skip counter since it will be decreased by the following insert event. */ - return continue_group(rli); + return continue_group(rgi); } #endif /* !MYSQL_CLIENT */ @@ -7724,9 +7704,11 @@ void Stop_log_event::print(FILE* file, PRINT_EVENT_INFO* print_event_info) Start_log_event_v3::do_apply_event(), not here. Because if we come here, the master was sane. */ + int Stop_log_event::do_update_pos(rpl_group_info *rgi) { Relay_log_info *rli= rgi->rli; + DBUG_ENTER("Stop_log_event::do_update_pos"); /* We do not want to update master_log pos because we get a rotate event before stop, so by now group_master_log_name is set to the next log. @@ -7734,7 +7716,7 @@ int Stop_log_event::do_update_pos(rpl_group_info *rgi) could give false triggers in MASTER_POS_WAIT() that we have reached the target position when in fact we have not. */ - if (thd->variables.option_bits & OPTION_BEGIN) + if (rli->get_flag(Relay_log_info::IN_TRANSACTION)) rli->inc_event_relay_log_pos(); else { @@ -7742,7 +7724,7 @@ int Stop_log_event::do_update_pos(rpl_group_info *rgi) rli->inc_group_relay_log_pos(0); flush_relay_log_info(rli); } - return 0; + DBUG_RETURN(0); } #endif /* !MYSQL_CLIENT */ @@ -8514,13 +8496,13 @@ int Begin_load_query_log_event::get_create_or_append() const #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) Log_event::enum_skip_reason -Begin_load_query_log_event::do_shall_skip(Relay_log_info *rli) +Begin_load_query_log_event::do_shall_skip(rpl_group_info *rgi) { /* If the slave skip counter is 1, then we should not start executing on the next event. */ - return continue_group(rli); + return continue_group(rgi); } #endif @@ -9272,17 +9254,6 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi) */ thd->set_time(when, when_sec_part); - /* - Now we are in a statement and will stay in a statement until we - see a STMT_END_F. - - We set this flag here, before actually applying any rows, in - case the SQL thread is stopped and we need to detect that we're - inside a statement and halting abruptly might cause problems - when restarting. - */ - const_cast(rli)->set_flag(Relay_log_info::IN_STMT); - if ( m_width == table->s->fields && bitmap_is_set_all(&m_cols)) set_flags(COMPLETE_ROWS_F); @@ -9442,17 +9413,17 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi) } Log_event::enum_skip_reason -Rows_log_event::do_shall_skip(Relay_log_info *rli) +Rows_log_event::do_shall_skip(rpl_group_info *rgi) { /* If the slave skip counter is 1 and this event does not end a statement, then we should not start executing on the next event. Otherwise, we defer the decision to the normal skipping logic. */ - if (rli->slave_skip_counter == 1 && !get_flags(STMT_END_F)) + if (rgi->rli->slave_skip_counter == 1 && !get_flags(STMT_END_F)) return Log_event::EVENT_SKIP_IGNORE; else - return Log_event::do_shall_skip(rli); + return Log_event::do_shall_skip(rgi); } /** @@ -9469,6 +9440,8 @@ Rows_log_event::do_shall_skip(Relay_log_info *rli) static int rows_event_stmt_cleanup(rpl_group_info *rgi, THD * thd) { int error; + DBUG_ENTER("rows_event_stmt_cleanup"); + { /* This is the end of a statement or transaction, so close (and @@ -9520,9 +9493,16 @@ static int rows_event_stmt_cleanup(rpl_group_info *rgi, THD * thd) */ thd->reset_current_stmt_binlog_format_row(); + /* + Reset modified_non_trans_table that we have set in + rows_log_event::do_apply_event() + */ + if (!thd->in_multi_stmt_transaction_mode()) + thd->transaction.all.modified_non_trans_table= 0; + rgi->cleanup_context(thd, 0); } - return error; + DBUG_RETURN(error); } /** @@ -9795,9 +9775,9 @@ int Annotate_rows_log_event::do_update_pos(rpl_group_info *rgi) #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) Log_event::enum_skip_reason -Annotate_rows_log_event::do_shall_skip(Relay_log_info *rli) +Annotate_rows_log_event::do_shall_skip(rpl_group_info *rgi) { - return continue_group(rli); + return continue_group(rgi); } #endif @@ -10265,7 +10245,7 @@ check_table_map(rpl_group_info *rgi, RPL_TABLE_LIST *table_list) enum_tbl_map_status res= OK_TO_PROCESS; Relay_log_info *rli= rgi->rli; - if (rli->sql_thd->slave_thread /* filtering is for slave only */ && + if (rgi->thd->slave_thread /* filtering is for slave only */ && (!rli->mi->rpl_filter->db_ok(table_list->db) || (rli->mi->rpl_filter->is_on() && !rli->mi->rpl_filter->tables_ok("", table_list)))) res= FILTERED_OUT; @@ -10316,7 +10296,7 @@ int Table_map_log_event::do_apply_event(rpl_group_info *rgi) DBUG_RETURN(HA_ERR_OUT_OF_MEM); /* call from mysql_client_binlog_statement() will not set rli->mi */ - filter= rli->sql_thd->slave_thread ? rli->mi->rpl_filter : global_rpl_filter; + filter= rgi->thd->slave_thread ? rli->mi->rpl_filter : global_rpl_filter; strmov(db_mem, filter->get_rewrite_db(m_dbnam, &dummy_len)); strmov(tname_mem, m_tblnam); @@ -10404,13 +10384,13 @@ int Table_map_log_event::do_apply_event(rpl_group_info *rgi) } Log_event::enum_skip_reason -Table_map_log_event::do_shall_skip(Relay_log_info *rli) +Table_map_log_event::do_shall_skip(rpl_group_info *rgi) { /* If the slave skip counter is 1, then we should not start executing on the next event. */ - return continue_group(rli); + return continue_group(rgi); } int Table_map_log_event::do_update_pos(rpl_group_info *rgi) diff --git a/sql/log_event.h b/sql/log_event.h index d689ebcd582..6fbd69453b4 100644 --- a/sql/log_event.h +++ b/sql/log_event.h @@ -1342,9 +1342,9 @@ public: @see do_shall_skip */ - enum_skip_reason shall_skip(Relay_log_info *rli) + enum_skip_reason shall_skip(rpl_group_info *rgi) { - return do_shall_skip(rli); + return do_shall_skip(rgi); } @@ -1352,6 +1352,7 @@ public: Check if an event is non-final part of a stand-alone event group, such as Intvar_log_event (such events should be processed as part of the following event group, not individually). + See also is_part_of_group() */ static bool is_part_of_group(enum Log_event_type ev_type) { @@ -1375,6 +1376,11 @@ public: return false; } } + /* + Same as above, but works on the object. In addition this is true for all + rows event except the last one. + */ + virtual bool is_part_of_group() { return 0; } static bool is_group_event(enum Log_event_type ev_type) { @@ -1408,14 +1414,14 @@ protected: A typical usage is: @code - enum_skip_reason do_shall_skip(Relay_log_info *rli) { - return continue_group(rli); + enum_skip_reason do_shall_skip(rpl_group_info *rgi) { + return continue_group(rgi); } @endcode @return Skip reason */ - enum_skip_reason continue_group(Relay_log_info *rli); + enum_skip_reason continue_group(rpl_group_info *rgi); /** Primitive to apply an event to the database. @@ -1493,7 +1499,7 @@ protected: The event shall be skipped because the slave skip counter was non-zero. The caller shall decrease the counter by one. */ - virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); + virtual enum_skip_reason do_shall_skip(rpl_group_info *rgi); #endif }; @@ -1985,7 +1991,7 @@ public: public: /* !!! Public in this patch to allow old usage */ #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) - virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); + virtual enum_skip_reason do_shall_skip(rpl_group_info *rgi); virtual int do_apply_event(rpl_group_info *rgi); virtual int do_update_pos(rpl_group_info *rgi); @@ -2017,6 +2023,9 @@ public: /* !!! Public in this patch to allow old usage */ !strncasecmp(query, "SAVEPOINT", 9) || !strncasecmp(query, "ROLLBACK", 8); } + bool is_begin() { return !strcmp(query, "BEGIN"); } + bool is_commit() { return !strcmp(query, "COMMIT"); } + bool is_rollback() { return !strcmp(query, "ROLLBACK"); } }; @@ -2501,7 +2510,7 @@ public: protected: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) virtual int do_apply_event(rpl_group_info *rgi); - virtual enum_skip_reason do_shall_skip(Relay_log_info*) + virtual enum_skip_reason do_shall_skip(rpl_group_info*) { /* Events from ourself should be skipped, but they should not @@ -2598,7 +2607,7 @@ protected: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) virtual int do_apply_event(rpl_group_info *rgi); virtual int do_update_pos(rpl_group_info *rgi); - virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); + virtual enum_skip_reason do_shall_skip(rpl_group_info *rgi); #endif }; @@ -2672,12 +2681,13 @@ Intvar_log_event(THD* thd_arg,uchar type_arg, ulonglong val_arg, bool write(IO_CACHE* file); #endif bool is_valid() const { return 1; } + bool is_part_of_group() { return 1; } private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) virtual int do_apply_event(rpl_group_info *rgi); virtual int do_update_pos(rpl_group_info *rgi); - virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); + virtual enum_skip_reason do_shall_skip(rpl_group_info *rgi); #endif }; @@ -2751,12 +2761,13 @@ class Rand_log_event: public Log_event bool write(IO_CACHE* file); #endif bool is_valid() const { return 1; } + bool is_part_of_group() { return 1; } private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) virtual int do_apply_event(rpl_group_info *rgi); virtual int do_update_pos(rpl_group_info *rgi); - virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); + virtual enum_skip_reason do_shall_skip(rpl_group_info *rgi); #endif }; @@ -2804,7 +2815,7 @@ class Xid_log_event: public Log_event private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) virtual int do_apply_event(rpl_group_info *rgi); - enum_skip_reason do_shall_skip(Relay_log_info *rli); + enum_skip_reason do_shall_skip(rpl_group_info *rgi); #endif }; @@ -2867,12 +2878,13 @@ public: void set_deferred() { deferred= true; } #endif bool is_valid() const { return name != 0; } + bool is_part_of_group() { return 1; } private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) virtual int do_apply_event(rpl_group_info *rgi); virtual int do_update_pos(rpl_group_info *rgi); - virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); + virtual enum_skip_reason do_shall_skip(rpl_group_info *rgi); #endif }; @@ -2906,7 +2918,7 @@ public: private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) virtual int do_update_pos(rpl_group_info *rgi); - virtual enum_skip_reason do_shall_skip(Relay_log_info *rli) + virtual enum_skip_reason do_shall_skip(rpl_group_info *rgi) { /* Events from ourself should be skipped, but they should not @@ -3008,7 +3020,7 @@ public: private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) virtual int do_update_pos(rpl_group_info *rgi); - virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); + virtual enum_skip_reason do_shall_skip(rpl_group_info *rgi); #endif }; @@ -3121,7 +3133,7 @@ public: void pack_info(THD *thd, Protocol *protocol); virtual int do_apply_event(rpl_group_info *rgi); virtual int do_update_pos(rpl_group_info *rgi); - virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); + virtual enum_skip_reason do_shall_skip(rpl_group_info *rgi); #endif #else void print(FILE *file, PRINT_EVENT_INFO *print_event_info); @@ -3497,7 +3509,7 @@ public: Log_event_type get_type_code() { return BEGIN_LOAD_QUERY_EVENT; } private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) - virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); + virtual enum_skip_reason do_shall_skip(rpl_group_info *rgi); #endif }; @@ -3619,6 +3631,7 @@ public: virtual int get_data_size(); virtual Log_event_type get_type_code(); virtual bool is_valid() const; + virtual bool is_part_of_group() { return 1; } #ifndef MYSQL_CLIENT virtual bool write_data_header(IO_CACHE*); @@ -3637,7 +3650,7 @@ public: private: virtual int do_apply_event(rpl_group_info *rgi); virtual int do_update_pos(rpl_group_info *rgi); - virtual enum_skip_reason do_shall_skip(Relay_log_info*); + virtual enum_skip_reason do_shall_skip(rpl_group_info*); #endif private: @@ -4030,6 +4043,7 @@ public: virtual Log_event_type get_type_code() { return TABLE_MAP_EVENT; } virtual bool is_valid() const { return m_memory != NULL; /* we check malloc */ } + virtual bool is_part_of_group() { return 1; } virtual int get_data_size() { return (uint) m_data_size; } #ifdef MYSQL_SERVER @@ -4052,7 +4066,7 @@ private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) virtual int do_apply_event(rpl_group_info *rgi); virtual int do_update_pos(rpl_group_info *rgi); - virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); + virtual enum_skip_reason do_shall_skip(rpl_group_info *rgi); #endif #ifdef MYSQL_SERVER @@ -4195,6 +4209,7 @@ public: { return m_rows_buf && m_cols.bitmap; } + bool is_part_of_group() { return get_flags(STMT_END_F) != 0; } uint m_row_count; /* The number of rows added to the event */ @@ -4280,7 +4295,7 @@ private: #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) virtual int do_apply_event(rpl_group_info *rgi); virtual int do_update_pos(rpl_group_info *rgi); - virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); + virtual enum_skip_reason do_shall_skip(rpl_group_info *rgi); /* Primitive to prepare for a sequence of row executions. diff --git a/sql/log_event_old.cc b/sql/log_event_old.cc index 58f299dabe7..b4f28abcf2b 100644 --- a/sql/log_event_old.cc +++ b/sql/log_event_old.cc @@ -205,17 +205,6 @@ Old_rows_log_event::do_apply_event(Old_rows_log_event *ev, rpl_group_info *rgi) /* A small test to verify that objects have consistent types */ DBUG_ASSERT(sizeof(ev_thd->variables.option_bits) == sizeof(OPTION_RELAXED_UNIQUE_CHECKS)); - /* - Now we are in a statement and will stay in a statement until we - see a STMT_END_F. - - We set this flag here, before actually applying any rows, in - case the SQL thread is stopped and we need to detect that we're - inside a statement and halting abruptly might cause problems - when restarting. - */ - const_cast(rli)->set_flag(Relay_log_info::IN_STMT); - error= do_before_row_operations(table); while (error == 0 && row_start < ev->m_rows_end) { @@ -1613,17 +1602,6 @@ int Old_rows_log_event::do_apply_event(rpl_group_info *rgi) /* A small test to verify that objects have consistent types */ DBUG_ASSERT(sizeof(thd->variables.option_bits) == sizeof(OPTION_RELAXED_UNIQUE_CHECKS)); - /* - Now we are in a statement and will stay in a statement until we - see a STMT_END_F. - - We set this flag here, before actually applying any rows, in - case the SQL thread is stopped and we need to detect that we're - inside a statement and halting abruptly might cause problems - when restarting. - */ - const_cast(rli)->set_flag(Relay_log_info::IN_STMT); - if ( m_width == table->s->fields && bitmap_is_set_all(&m_cols)) set_flags(COMPLETE_ROWS_F); @@ -1820,17 +1798,17 @@ int Old_rows_log_event::do_apply_event(rpl_group_info *rgi) Log_event::enum_skip_reason -Old_rows_log_event::do_shall_skip(Relay_log_info *rli) +Old_rows_log_event::do_shall_skip(rpl_group_info *rgi) { /* If the slave skip counter is 1 and this event does not end a statement, then we should not start executing on the next event. Otherwise, we defer the decision to the normal skipping logic. */ - if (rli->slave_skip_counter == 1 && !get_flags(STMT_END_F)) + if (rgi->rli->slave_skip_counter == 1 && !get_flags(STMT_END_F)) return Log_event::EVENT_SKIP_IGNORE; else - return Log_event::do_shall_skip(rli); + return Log_event::do_shall_skip(rgi); } int diff --git a/sql/log_event_old.h b/sql/log_event_old.h index 01b80439fa1..e5ed25f57ac 100644 --- a/sql/log_event_old.h +++ b/sql/log_event_old.h @@ -145,6 +145,7 @@ public: { return m_rows_buf && m_cols.bitmap; } + bool is_part_of_group() { return 1; } uint m_row_count; /* The number of rows added to the event */ @@ -216,7 +217,7 @@ private: #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) virtual int do_apply_event(rpl_group_info *rgi); virtual int do_update_pos(rpl_group_info *rgi); - virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); + virtual enum_skip_reason do_shall_skip(rpl_group_info *rgi); /* Primitive to prepare for a sequence of row executions. diff --git a/sql/mysqld.cc b/sql/mysqld.cc index a7fa78838a9..9f1d9e48b1c 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -467,11 +467,12 @@ uint lower_case_table_names; ulong tc_heuristic_recover= 0; int32 thread_count; int32 thread_running; +int32 slave_open_temp_tables; ulong thread_created; ulong back_log, connect_timeout, concurrency, server_id; ulong table_cache_size, table_def_size; ulong what_to_log; -ulong slow_launch_time, slave_open_temp_tables; +ulong slow_launch_time; ulong open_files_limit, max_binlog_size; ulong slave_trans_retries; uint slave_net_timeout; @@ -767,7 +768,7 @@ PSI_mutex_key key_BINLOG_LOCK_index, key_BINLOG_LOCK_xid_list, key_master_info_sleep_lock, key_mutex_slave_reporting_capability_err_lock, key_relay_log_info_data_lock, key_relay_log_info_log_space_lock, key_relay_log_info_run_lock, - key_relay_log_info_sleep_lock, + key_rpl_group_info_sleep_lock, key_structure_guard_mutex, key_TABLE_SHARE_LOCK_ha_data, key_LOCK_error_messages, key_LOG_INFO_lock, key_LOCK_thread_count, key_LOCK_thread_cache, @@ -839,7 +840,7 @@ static PSI_mutex_info all_server_mutexes[]= { &key_relay_log_info_data_lock, "Relay_log_info::data_lock", 0}, { &key_relay_log_info_log_space_lock, "Relay_log_info::log_space_lock", 0}, { &key_relay_log_info_run_lock, "Relay_log_info::run_lock", 0}, - { &key_relay_log_info_sleep_lock, "Relay_log_info::sleep_lock", 0}, + { &key_rpl_group_info_sleep_lock, "Rpl_group_info::sleep_lock", 0}, { &key_structure_guard_mutex, "Query_cache::structure_guard_mutex", 0}, { &key_TABLE_SHARE_LOCK_ha_data, "TABLE_SHARE::LOCK_ha_data", 0}, { &key_LOCK_error_messages, "LOCK_error_messages", PSI_FLAG_GLOBAL}, @@ -888,7 +889,7 @@ PSI_cond_key key_BINLOG_COND_xid_list, key_BINLOG_update_cond, key_master_info_sleep_cond, key_relay_log_info_data_cond, key_relay_log_info_log_space_cond, key_relay_log_info_start_cond, key_relay_log_info_stop_cond, - key_relay_log_info_sleep_cond, + key_rpl_group_info_sleep_cond, key_TABLE_SHARE_cond, key_user_level_lock_cond, key_COND_thread_count, key_COND_thread_cache, key_COND_flush_thread_cache, key_BINLOG_COND_queue_busy; @@ -934,7 +935,7 @@ static PSI_cond_info all_server_conds[]= { &key_relay_log_info_log_space_cond, "Relay_log_info::log_space_cond", 0}, { &key_relay_log_info_start_cond, "Relay_log_info::start_cond", 0}, { &key_relay_log_info_stop_cond, "Relay_log_info::stop_cond", 0}, - { &key_relay_log_info_sleep_cond, "Relay_log_info::sleep_cond", 0}, + { &key_rpl_group_info_sleep_cond, "Rpl_group_info::sleep_cond", 0}, { &key_TABLE_SHARE_cond, "TABLE_SHARE::cond", 0}, { &key_user_level_lock_cond, "User_level_lock::cond", 0}, { &key_COND_thread_count, "COND_thread_count", PSI_FLAG_GLOBAL}, @@ -7285,7 +7286,7 @@ SHOW_VAR status_vars[]= { {"Select_range", (char*) offsetof(STATUS_VAR, select_range_count), SHOW_LONG_STATUS}, {"Select_range_check", (char*) offsetof(STATUS_VAR, select_range_check_count), SHOW_LONG_STATUS}, {"Select_scan", (char*) offsetof(STATUS_VAR, select_scan_count), SHOW_LONG_STATUS}, - {"Slave_open_temp_tables", (char*) &slave_open_temp_tables, SHOW_LONG}, + {"Slave_open_temp_tables", (char*) &slave_open_temp_tables, SHOW_INT}, #ifdef HAVE_REPLICATION {"Slave_retried_transactions",(char*)&slave_retried_transactions, SHOW_LONG}, {"Slave_heartbeat_period", (char*) &show_heartbeat_period, SHOW_SIMPLE_FUNC}, diff --git a/sql/mysqld.h b/sql/mysqld.h index 345e9fa74c9..0bd3687f4fb 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -153,7 +153,7 @@ extern ulong delayed_insert_timeout; extern ulong delayed_insert_limit, delayed_queue_size; extern ulong delayed_insert_threads, delayed_insert_writes; extern ulong delayed_rows_in_use,delayed_insert_errors; -extern ulong slave_open_temp_tables; +extern int32 slave_open_temp_tables; extern ulonglong query_cache_size; extern ulong query_cache_min_res_unit; extern ulong slow_launch_threads, slow_launch_time; @@ -246,7 +246,7 @@ extern PSI_mutex_key key_BINLOG_LOCK_index, key_BINLOG_LOCK_xid_list, key_master_info_sleep_lock, key_mutex_slave_reporting_capability_err_lock, key_relay_log_info_data_lock, key_relay_log_info_log_space_lock, key_relay_log_info_run_lock, - key_relay_log_info_sleep_lock, + key_rpl_group_info_sleep_lock, key_structure_guard_mutex, key_TABLE_SHARE_LOCK_ha_data, key_LOCK_error_messages, key_LOCK_thread_count, key_PARTITION_LOCK_auto_inc; extern PSI_mutex_key key_RELAYLOG_LOCK_index; @@ -278,7 +278,7 @@ extern PSI_cond_key key_BINLOG_COND_xid_list, key_BINLOG_update_cond, key_master_info_sleep_cond, key_relay_log_info_data_cond, key_relay_log_info_log_space_cond, key_relay_log_info_start_cond, key_relay_log_info_stop_cond, - key_relay_log_info_sleep_cond, + key_rpl_group_info_sleep_cond, key_TABLE_SHARE_cond, key_user_level_lock_cond, key_COND_thread_count, key_COND_thread_cache, key_COND_flush_thread_cache; extern PSI_cond_key key_RELAYLOG_update_cond, key_COND_wakeup_ready, diff --git a/sql/rpl_gtid.cc b/sql/rpl_gtid.cc index a1b14ad3255..1e393eab502 100644 --- a/sql/rpl_gtid.cc +++ b/sql/rpl_gtid.cc @@ -65,6 +65,7 @@ int rpl_slave_state::record_and_update_gtid(THD *thd, rpl_group_info *rgi) { uint64 sub_id; + DBUG_ENTER("rpl_slave_state::record_and_update_gtid"); /* Update the GTID position, if we have it and did not already update @@ -74,10 +75,10 @@ rpl_slave_state::record_and_update_gtid(THD *thd, rpl_group_info *rgi) { rgi->gtid_sub_id= 0; if (record_gtid(thd, &rgi->current_gtid, sub_id, false, false)) - return 1; + DBUG_RETURN(1); update_state_hash(sub_id, &rgi->current_gtid); } - return 0; + DBUG_RETURN(0); } @@ -310,6 +311,7 @@ rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id, element *elem; ulonglong thd_saved_option= thd->variables.option_bits; Query_tables_list lex_backup; + DBUG_ENTER("record_gtid"); if (unlikely(!loaded)) { @@ -320,7 +322,7 @@ rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id, We already complained loudly about this, but we can try to continue until the DBA fixes it. */ - return 0; + DBUG_RETURN(0); } if (!in_statement) @@ -329,7 +331,7 @@ rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id, DBUG_EXECUTE_IF("gtid_inject_record_gtid", { my_error(ER_CANNOT_UPDATE_GTID_STATE, MYF(0)); - return 1; + DBUG_RETURN(1); } ); thd->lex->reset_n_backup_query_tables_list(&lex_backup); @@ -347,8 +349,11 @@ rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id, table->no_replicate= 1; if (!in_transaction) + { + DBUG_PRINT("info", ("resetting OPTION_BEGIN")); thd->variables.option_bits&= ~(ulonglong)(OPTION_NOT_AUTOCOMMIT|OPTION_BEGIN); + } bitmap_set_all(table->write_set); @@ -457,7 +462,7 @@ end: } thd->lex->restore_backup_query_tables_list(&lex_backup); thd->variables.option_bits= thd_saved_option; - return err; + DBUG_RETURN(err); } diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index e80512a3580..c10a035c599 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -43,11 +43,6 @@ slave rolls back the transaction; parallel execution needs to be able to deal with this wrt. commit_orderer and such. - - Relay_log_info::is_in_group(). This needs to be handled correctly in all - callers. I think it needs to be split into two, one version in - Relay_log_info to be used from next_event() in slave.cc, one to be used in - per-transaction stuff. - - We should fail if we connect to the master with opt_slave_parallel_threads greater than zero and master does not support GTID. Just to avoid a bunch of potential problems, we won't be able to do any parallel replication @@ -71,6 +66,7 @@ rpt_handle_event(rpl_parallel_thread::queued_event *qev, /* ToDo: Access to thd, and what about rli, split out a parallel part? */ mysql_mutex_lock(&rli->data_lock); + qev->ev->thd= thd; err= apply_event_and_update_pos(qev->ev, thd, rgi, rpt); thd->rgi_slave= NULL; /* ToDo: error handling. */ @@ -234,8 +230,8 @@ handle_rpl_parallel_thread(void *arg) ((group_standalone && !Log_event::is_part_of_group(event_type)) || event_type == XID_EVENT || (event_type == QUERY_EVENT && - (!strcmp("COMMIT", ((Query_log_event *)events->ev)->query) || - !strcmp("ROLLBACK", ((Query_log_event *)events->ev)->query)))); + (((Query_log_event *)events->ev)->is_commit() || + ((Query_log_event *)events->ev)->is_rollback()))); delete_or_keep_event_post_apply(rgi, event_type, events->ev); my_free(events); @@ -612,6 +608,11 @@ rpl_parallel::wait_for_done() } +/* + do_event() is executed by the sql_driver_thd thread. + It's main purpose is to find a thread that can exectue the query. +*/ + bool rpl_parallel::do_event(rpl_group_info *serial_rgi, Log_event *ev) { @@ -718,9 +719,9 @@ rpl_parallel::do_event(rpl_group_info *serial_rgi, Log_event *ev) if (!cur_thread) { /* - Nothing else is currently running in this domain. We can spawn a new - thread to do this event group in parallel with anything else that might - be running in other domains. + Nothing else is currently running in this domain. We can + spawn a new thread to do this event group in parallel with + anything else that might be running in other domains. */ cur_thread= e->rpl_thread= global_rpl_thread_pool.get_thread(e); /* get_thread() returns with the LOCK_rpl_thread locked. */ diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index c4b898f74e3..ae2b7558285 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -56,7 +56,7 @@ Relay_log_info::Relay_log_info(bool is_slave_recovery) #endif group_master_log_pos(0), log_space_total(0), ignore_log_space_limit(0), last_master_timestamp(0), slave_skip_counter(0), - abort_pos_wait(0), slave_run_id(0), sql_thd(0), + abort_pos_wait(0), slave_run_id(0), sql_driver_thd(), inited(0), abort_slave(0), slave_running(0), until_condition(UNTIL_NONE), until_log_pos(0), retried_trans(0), executed_entries(0), last_event_start_time(0), m_flags(0), @@ -85,12 +85,10 @@ Relay_log_info::Relay_log_info(bool is_slave_recovery) &data_lock, MY_MUTEX_INIT_FAST); mysql_mutex_init(key_relay_log_info_log_space_lock, &log_space_lock, MY_MUTEX_INIT_FAST); - mysql_mutex_init(key_relay_log_info_sleep_lock, &sleep_lock, MY_MUTEX_INIT_FAST); mysql_cond_init(key_relay_log_info_data_cond, &data_cond, NULL); mysql_cond_init(key_relay_log_info_start_cond, &start_cond, NULL); mysql_cond_init(key_relay_log_info_stop_cond, &stop_cond, NULL); mysql_cond_init(key_relay_log_info_log_space_cond, &log_space_cond, NULL); - mysql_cond_init(key_relay_log_info_sleep_cond, &sleep_cond, NULL); relay_log.init_pthread_objects(); DBUG_VOID_RETURN; } @@ -103,12 +101,10 @@ Relay_log_info::~Relay_log_info() mysql_mutex_destroy(&run_lock); mysql_mutex_destroy(&data_lock); mysql_mutex_destroy(&log_space_lock); - mysql_mutex_destroy(&sleep_lock); mysql_cond_destroy(&data_cond); mysql_cond_destroy(&start_cond); mysql_cond_destroy(&stop_cond); mysql_cond_destroy(&log_space_cond); - mysql_cond_destroy(&sleep_cond); relay_log.cleanup(); DBUG_VOID_RETURN; } @@ -523,6 +519,8 @@ int init_relay_log_pos(Relay_log_info* rli,const char* log, } rli->group_relay_log_pos = rli->event_relay_log_pos = pos; + rli->clear_flag(Relay_log_info::IN_STMT); + rli->clear_flag(Relay_log_info::IN_TRANSACTION); /* Test to see if the previous run was with the skip of purging @@ -935,6 +933,9 @@ void Relay_log_info::close_temporary_tables() for (table=save_temporary_tables ; table ; table=next) { next=table->next; + + /* Reset in_use as the table may have been created by another thd */ + table->in_use=0; /* Don't ask for disk deletion. For now, anyway they will be deleted when slave restarts, but it is a better intention to not delete them. @@ -1094,9 +1095,9 @@ bool Relay_log_info::is_until_satisfied(THD *thd, Log_event *ev) !replicate_same_server_id) DBUG_RETURN(FALSE); log_name= group_master_log_name; - log_pos= (!ev)? group_master_log_pos : - ((thd->variables.option_bits & OPTION_BEGIN || !ev->log_pos) ? - group_master_log_pos : ev->log_pos - ev->data_written); + log_pos= ((!ev)? group_master_log_pos : + (get_flag(IN_TRANSACTION) || !ev->log_pos) ? + group_master_log_pos : ev->log_pos - ev->data_written); } else { /* until_condition == UNTIL_RELAY_POS */ @@ -1195,7 +1196,7 @@ void Relay_log_info::stmt_done(my_off_t event_master_log_pos, #ifndef DBUG_OFF extern uint debug_not_change_ts_if_art_event; #endif - clear_flag(IN_STMT); + DBUG_ENTER("Relay_log_info::stmt_done"); DBUG_ASSERT(rgi->rli == this); /* @@ -1204,6 +1205,9 @@ void Relay_log_info::stmt_done(my_off_t event_master_log_pos, (not OPTION_NOT_AUTOCOMMIT) as transactions are logged with BEGIN/COMMIT, not with SET AUTOCOMMIT= . + We can't use rgi->rli->get_flag(IN_TRANSACTION) here as OPTION_BEGIN + is also used for single row transactions. + CAUTION: opt_using_transactions means innodb || bdb ; suppose the master supports InnoDB and BDB, but the slave supports only BDB, problems will arise: - suppose an InnoDB table is created on the @@ -1221,7 +1225,8 @@ void Relay_log_info::stmt_done(my_off_t event_master_log_pos, middle of the "transaction". START SLAVE will resume at BEGIN while the MyISAM table has already been updated. */ - if ((rgi->thd->variables.option_bits & OPTION_BEGIN) && opt_using_transactions) + if ((rgi->thd->variables.option_bits & OPTION_BEGIN) && + opt_using_transactions) inc_event_relay_log_pos(); else { @@ -1255,6 +1260,7 @@ void Relay_log_info::stmt_done(my_off_t event_master_log_pos, IF_DBUG(debug_not_change_ts_if_art_event > 0, 1))) last_master_timestamp= event_creation_time; } + DBUG_VOID_RETURN; } #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) @@ -1417,12 +1423,17 @@ rpl_group_info::rpl_group_info(Relay_log_info *rli_) tables_to_lock_count(0) { bzero(¤t_gtid, sizeof(current_gtid)); + mysql_mutex_init(key_rpl_group_info_sleep_lock, &sleep_lock, + MY_MUTEX_INIT_FAST); + mysql_cond_init(key_rpl_group_info_sleep_cond, &sleep_cond, NULL); } rpl_group_info::~rpl_group_info() { free_annotate_event(); + mysql_mutex_destroy(&sleep_lock); + mysql_cond_destroy(&sleep_cond); } @@ -1492,7 +1503,8 @@ delete_or_keep_event_post_apply(rpl_group_info *rgi, void rpl_group_info::cleanup_context(THD *thd, bool error) { DBUG_ENTER("Relay_log_info::cleanup_context"); - + DBUG_PRINT("enter", ("error: %d", (int) error)); + DBUG_ASSERT(this->thd == thd); /* 1) Instances of Table_map_log_event, if ::do_apply_event() was called on them, @@ -1514,9 +1526,20 @@ void rpl_group_info::cleanup_context(THD *thd, bool error) m_table_map.clear_tables(); slave_close_thread_tables(thd); if (error) + { thd->mdl_context.release_transactional_locks(); - /* ToDo: This must clear the flag in rgi, not rli. */ - rli->clear_flag(Relay_log_info::IN_STMT); + + if (thd == rli->sql_driver_thd) + { + /* + Reset flags. This is needed to handle incident events and errors in + the relay log noticed by the sql driver thread. + */ + rli->clear_flag(Relay_log_info::IN_STMT); + rli->clear_flag(Relay_log_info::IN_TRANSACTION); + } + } + /* Cleanup for the flags that have been set at do_apply_event. */ diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h index 10181cc6fab..9e96fb8e72c 100644 --- a/sql/rpl_rli.h +++ b/sql/rpl_rli.h @@ -59,14 +59,14 @@ class Relay_log_info : public Slave_reporting_capability { public: /** - Flags for the state of the replication. - */ + Flags for the state of reading the relay log. Note that these are + bit masks. + */ enum enum_state_flag { - /** The replication thread is inside a statement */ - IN_STMT, - - /** Flag counter. Should always be last */ - STATE_FLAGS_COUNT + /** We are inside a group of events forming a statement */ + IN_STMT=1, + /** We have inside a transaction */ + IN_TRANSACTION=2 }; /* @@ -131,9 +131,14 @@ public: IO_CACHE info_file; /* - When we restart slave thread we need to have access to the previously - created temporary tables. Modified only on init/end and by the SQL - thread, read only by SQL thread. + List of temporary tables used by this connection. + This is updated when a temporary table is created or dropped by + a replication thread. + + Not reset when replication ends, to allow one to access the tables + when replication restarts. + + Protected by data_lock. */ TABLE *save_temporary_tables; @@ -141,13 +146,13 @@ public: standard lock acquisition order to avoid deadlocks: run_lock, data_lock, relay_log.LOCK_log, relay_log.LOCK_index */ - mysql_mutex_t data_lock, run_lock, sleep_lock; + mysql_mutex_t data_lock, run_lock; /* start_cond is broadcast when SQL thread is started stop_cond - when stopped data_cond - when data protected by data_lock changes */ - mysql_cond_t start_cond, stop_cond, data_cond, sleep_cond; + mysql_cond_t start_cond, stop_cond, data_cond; /* parent Master_info structure */ Master_info *mi; @@ -164,8 +169,8 @@ public: - an autocommiting query + its associated events (INSERT_ID, TIMESTAMP...) We need these rli coordinates : - - relay log name and position of the beginning of the group we currently are - executing. Needed to know where we have to restart when replication has + - relay log name and position of the beginning of the group we currently + are executing. Needed to know where we have to restart when replication has stopped in the middle of a group (which has been rolled back by the slave). - relay log name and position just after the event we have just executed. This event is part of the current group. @@ -239,7 +244,13 @@ public: ulong max_relay_log_size; mysql_mutex_t log_space_lock; mysql_cond_t log_space_cond; - THD * sql_thd; + /* + THD for the main sql thread, the one that starts threads to process + slave requests. If there is only one thread, then this THD is also + used for SQL processing. + A kill sent to this THD will kill the replication. + */ + THD *sql_driver_thd; #ifndef DBUG_OFF int events_till_abort; #endif @@ -399,6 +410,25 @@ public: time_t event_creation_time, THD *thd, rpl_group_info *rgi); + /** + Is the replication inside a group? + + The reader of the relay log is inside a group if either: + - The IN_TRANSACTION flag is set, meaning we're inside a transaction + - The IN_STMT flag is set, meaning we have read at least one row from + a multi-event entry. + + This flag reflects the state of the log 'just now', ie after the last + read event would be executed. + This allow us to test if we can stop replication before reading + the next entry. + + @retval true Replication thread is currently inside a group + @retval false Replication thread is currently not inside a group + */ + bool is_in_group() const { + return (m_flags & (IN_STMT | IN_TRANSACTION)); + } /** Set the value of a replication state flag. @@ -407,7 +437,7 @@ public: */ void set_flag(enum_state_flag flag) { - m_flags |= (1UL << flag); + m_flags|= flag; } /** @@ -419,7 +449,7 @@ public: */ bool get_flag(enum_state_flag flag) { - return m_flags & (1UL << flag); + return m_flags & flag; } /** @@ -429,22 +459,7 @@ public: */ void clear_flag(enum_state_flag flag) { - m_flags &= ~(1UL << flag); - } - - /** - Is the replication inside a group? - - Replication is inside a group if either: - - The OPTION_BEGIN flag is set, meaning we're inside a transaction - - The RLI_IN_STMT flag is set, meaning we're inside a statement - - @retval true Replication thread is currently inside a group - @retval false Replication thread is currently not inside a group - */ - bool is_in_group() const { - return (sql_thd->variables.option_bits & OPTION_BEGIN) || - (m_flags & (1UL << IN_STMT)); + m_flags&= ~flag; } time_t get_row_stmt_start_timestamp() @@ -482,7 +497,12 @@ public: private: - /* ToDo: This must be moved to rpl_group_info. */ + /* + Holds the state of the data in the relay log. + We need this to ensure that we are not in the middle of a + statement or inside BEGIN ... COMMIT when should rotate the + relay log. + */ uint32 m_flags; /* @@ -503,8 +523,11 @@ private: together. In parallel replication, there will be one rpl_group_info object for - each running thd. All rpl_group_info will share the same Relay_log_info. + each running sql thread, each having their own thd. + + All rpl_group_info will share the same Relay_log_info. */ + struct rpl_group_info { Relay_log_info *rli; @@ -566,6 +589,8 @@ struct rpl_group_info RPL_TABLE_LIST *tables_to_lock; /* RBR: Tables to lock */ uint tables_to_lock_count; /* RBR: Count of tables to lock */ table_mapping m_table_map; /* RBR: Mapping table-id to table */ + mysql_mutex_t sleep_lock; + mysql_cond_t sleep_cond; rpl_group_info(Relay_log_info *rli_); ~rpl_group_info(); diff --git a/sql/slave.cc b/sql/slave.cc index cd4e4254dbc..2504f723a78 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -146,8 +146,8 @@ typedef enum { SLAVE_THD_IO, SLAVE_THD_SQL} SLAVE_THD_TYPE; static int process_io_rotate(Master_info* mi, Rotate_log_event* rev); static int process_io_create_file(Master_info* mi, Create_file_log_event* cev); static bool wait_for_relay_log_space(Relay_log_info* rli); -static inline bool io_slave_killed(THD* thd,Master_info* mi); -static inline bool sql_slave_killed(THD* thd,Relay_log_info* rli); +static bool io_slave_killed(Master_info* mi); +static bool sql_slave_killed(rpl_group_info *rgi); static int init_slave_thread(THD* thd, Master_info *mi, SLAVE_THD_TYPE thd_type); static void print_slave_skip_errors(void); @@ -156,14 +156,14 @@ static int safe_reconnect(THD* thd, MYSQL* mysql, Master_info* mi, bool suppress_warnings); static int connect_to_master(THD* thd, MYSQL* mysql, Master_info* mi, bool reconnect, bool suppress_warnings); -static Log_event* next_event(Relay_log_info* rli); +static Log_event* next_event(rpl_group_info* rgi); static int queue_event(Master_info* mi,const char* buf,ulong event_len); static int terminate_slave_thread(THD *thd, mysql_mutex_t *term_lock, mysql_cond_t *term_cond, volatile uint *slave_running, bool skip_lock); -static bool check_io_slave_killed(THD *thd, Master_info *mi, const char *info); +static bool check_io_slave_killed(Master_info *mi, const char *info); static bool send_show_master_info_header(THD *thd, bool full, size_t gtid_pos_length); static bool send_show_master_info_data(THD *thd, Master_info *mi, bool full, @@ -570,13 +570,6 @@ void init_slave_skip_errors(const char* arg) DBUG_VOID_RETURN; } -static void set_thd_in_use_temporary_tables(Relay_log_info *rli) -{ - TABLE *table; - - for (table= rli->save_temporary_tables ; table ; table= table->next) - table->in_use= rli->sql_thd; -} int terminate_slave_threads(Master_info* mi,int thread_mask,bool skip_lock) { @@ -592,7 +585,7 @@ int terminate_slave_threads(Master_info* mi,int thread_mask,bool skip_lock) { DBUG_PRINT("info",("Terminating SQL thread")); mi->rli.abort_slave=1; - if ((error=terminate_slave_thread(mi->rli.sql_thd, sql_lock, + if ((error=terminate_slave_thread(mi->rli.sql_driver_thd, sql_lock, &mi->rli.stop_cond, &mi->rli.slave_running, skip_lock)) && @@ -957,13 +950,12 @@ void end_slave() DBUG_VOID_RETURN; } -static bool io_slave_killed(THD* thd, Master_info* mi) +static bool io_slave_killed(Master_info* mi) { DBUG_ENTER("io_slave_killed"); - DBUG_ASSERT(mi->io_thd == thd); DBUG_ASSERT(mi->slave_running); // tracking buffer overrun - DBUG_RETURN(mi->abort_slave || abort_loop || thd->killed); + DBUG_RETURN(mi->abort_slave || abort_loop || mi->io_thd->killed); } /** @@ -979,26 +971,36 @@ static bool io_slave_killed(THD* thd, Master_info* mi) @return TRUE the killed status is recognized, FALSE a possible killed status is deferred. */ -static bool sql_slave_killed(THD* thd, Relay_log_info* rli) +static bool sql_slave_killed(rpl_group_info *rgi) { bool ret= FALSE; + Relay_log_info *rli= rgi->rli; + THD *thd= rgi->thd; DBUG_ENTER("sql_slave_killed"); - DBUG_ASSERT(rli->sql_thd == thd); + DBUG_ASSERT(rli->sql_driver_thd == thd); DBUG_ASSERT(rli->slave_running == 1);// tracking buffer overrun - if (abort_loop || thd->killed || rli->abort_slave) + if (abort_loop || rli->sql_driver_thd->killed || rli->abort_slave) { /* - The transaction should always be binlogged if OPTION_KEEP_LOG is set - (it implies that something can not be rolled back). And such case - should be regarded similarly as modifing a non-transactional table - because retrying of the transaction will lead to an error or inconsistency - as well. - Example: OPTION_KEEP_LOG is set if a temporary table is created or dropped. + The transaction should always be binlogged if OPTION_KEEP_LOG is + set (it implies that something can not be rolled back). And such + case should be regarded similarly as modifing a + non-transactional table because retrying of the transaction will + lead to an error or inconsistency as well. + + Example: OPTION_KEEP_LOG is set if a temporary table is created + or dropped. + + Note that transaction.all.modified_non_trans_table may be 1 + if last statement was a single row transaction without begin/end. + Testing this flag must always be done in connection with + rli->is_in_group(). */ + if ((thd->transaction.all.modified_non_trans_table || - (thd->variables.option_bits & OPTION_KEEP_LOG)) - && rli->is_in_group()) + (thd->variables.option_bits & OPTION_KEEP_LOG)) && + rli->is_in_group()) { char msg_stopped[]= "... Slave SQL Thread stopped with incomplete event group " @@ -1008,20 +1010,28 @@ static bool sql_slave_killed(THD* thd, Relay_log_info* rli) "ignores duplicate key, key not found, and similar errors (see " "documentation for details)."; + DBUG_PRINT("info", ("modified_non_trans_table: %d OPTION_BEGIN: %d " + "is_in_group: %d", + thd->transaction.all.modified_non_trans_table, + test(thd->variables.option_bits & OPTION_BEGIN), + rli->is_in_group())); + if (rli->abort_slave) { - DBUG_PRINT("info", ("Request to stop slave SQL Thread received while " - "applying a group that has non-transactional " - "changes; waiting for completion of the group ... ")); + DBUG_PRINT("info", + ("Request to stop slave SQL Thread received while " + "applying a group that has non-transactional " + "changes; waiting for completion of the group ... ")); /* - Slave sql thread shutdown in face of unfinished group modified - Non-trans table is handled via a timer. The slave may eventually - give out to complete the current group and in that case there - might be issues at consequent slave restart, see the error message. - WL#2975 offers a robust solution requiring to store the last exectuted - event's coordinates along with the group's coordianates - instead of waiting with @c last_event_start_time the timer. + Slave sql thread shutdown in face of unfinished group + modified Non-trans table is handled via a timer. The slave + may eventually give out to complete the current group and in + that case there might be issues at consequent slave restart, + see the error message. WL#2975 offers a robust solution + requiring to store the last exectuted event's coordinates + along with the group's coordianates instead of waiting with + @c last_event_start_time the timer. */ if (rli->last_event_start_time == 0) @@ -1049,7 +1059,8 @@ static bool sql_slave_killed(THD* thd, Relay_log_info* rli) else { ret= TRUE; - rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, ER(ER_SLAVE_FATAL_ERROR), + rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, + ER(ER_SLAVE_FATAL_ERROR), msg_stopped); } } @@ -1461,7 +1472,7 @@ static int get_master_version_and_clock(MYSQL* mysql, Master_info* mi) mi->clock_diff_with_master= (long) (time((time_t*) 0) - strtoul(master_row[0], 0, 10)); } - else if (check_io_slave_killed(mi->io_thd, mi, NULL)) + else if (check_io_slave_killed(mi, NULL)) goto slave_killed_err; else if (is_network_error(mysql_errno(mysql))) { @@ -1526,7 +1537,7 @@ not always make sense; please check the manual before using it)."; } else if (mysql_errno(mysql)) { - if (check_io_slave_killed(mi->io_thd, mi, NULL)) + if (check_io_slave_killed(mi, NULL)) goto slave_killed_err; else if (is_network_error(mysql_errno(mysql))) { @@ -1599,7 +1610,7 @@ be equal for the Statement-format replication to work"; goto err; } } - else if (check_io_slave_killed(mi->io_thd, mi, NULL)) + else if (check_io_slave_killed(mi, NULL)) goto slave_killed_err; else if (is_network_error(mysql_errno(mysql))) { @@ -1662,7 +1673,7 @@ be equal for the Statement-format replication to work"; goto err; } } - else if (check_io_slave_killed(mi->io_thd, mi, NULL)) + else if (check_io_slave_killed(mi, NULL)) goto slave_killed_err; else if (is_network_error(err_code= mysql_errno(mysql))) { @@ -1707,7 +1718,7 @@ when it try to get the value of TIME_ZONE global variable from master."; sprintf(query, query_format, llbuf); if (mysql_real_query(mysql, query, strlen(query)) - && !check_io_slave_killed(mi->io_thd, mi, NULL)) + && !check_io_slave_killed(mi, NULL)) { errmsg= "The slave I/O thread stops because SET @master_heartbeat_period " "on master failed."; @@ -1742,7 +1753,7 @@ when it try to get the value of TIME_ZONE global variable from master."; rc= mysql_real_query(mysql, query, strlen(query)); if (rc != 0) { - if (check_io_slave_killed(mi->io_thd, mi, NULL)) + if (check_io_slave_killed(mi, NULL)) goto slave_killed_err; if (mysql_errno(mysql) == ER_UNKNOWN_SYSTEM_VARIABLE) @@ -1788,7 +1799,7 @@ when it try to get the value of TIME_ZONE global variable from master."; DBUG_ASSERT(mi->checksum_alg_before_fd == BINLOG_CHECKSUM_ALG_OFF || mi->checksum_alg_before_fd == BINLOG_CHECKSUM_ALG_CRC32); } - else if (check_io_slave_killed(mi->io_thd, mi, NULL)) + else if (check_io_slave_killed(mi, NULL)) goto slave_killed_err; else if (is_network_error(mysql_errno(mysql))) { @@ -2052,7 +2063,7 @@ after_set_capability: rpl_global_gtid_slave_state.load(mi->io_thd, master_row[0], strlen(master_row[0]), false, false); } - else if (check_io_slave_killed(mi->io_thd, mi, NULL)) + else if (check_io_slave_killed(mi, NULL)) goto slave_killed_err; else if (is_network_error(mysql_errno(mysql))) { @@ -2118,7 +2129,7 @@ static bool wait_for_relay_log_space(Relay_log_info* rli) "\ Waiting for the slave SQL thread to free enough relay log space"); while (rli->log_space_limit < rli->log_space_total && - !(slave_killed=io_slave_killed(thd,mi)) && + !(slave_killed=io_slave_killed(mi)) && !rli->ignore_log_space_limit) mysql_cond_wait(&rli->log_space_cond, &rli->log_space_lock); @@ -2293,7 +2304,7 @@ int register_slave_on_master(MYSQL* mysql, Master_info *mi, { *suppress_warnings= TRUE; // Suppress reconnect warning } - else if (!check_io_slave_killed(mi->io_thd, mi, NULL)) + else if (!check_io_slave_killed(mi, NULL)) { char buf[256]; my_snprintf(buf, sizeof(buf), "%s (Errno: %d)", mysql_error(mysql), @@ -2463,8 +2474,15 @@ static bool send_show_master_info_data(THD *thd, Master_info *mi, bool full, &my_charset_bin); mysql_mutex_lock(&mi->run_lock); if (full) - protocol->store(mi->rli.sql_thd ? mi->rli.sql_thd->proc_info : "", + { + /* + Show what the sql driver replication thread is doing + This is only meaningful if there is only one slave thread. + */ + protocol->store(mi->rli.sql_driver_thd ? + mi->rli.sql_driver_thd->proc_info : "", &my_charset_bin); + } protocol->store(mi->io_thd ? mi->io_thd->proc_info : "", &my_charset_bin); mysql_mutex_unlock(&mi->run_lock); @@ -2797,8 +2815,8 @@ static int init_slave_thread(THD* thd, Master_info *mi, @retval True if the thread has been killed, false otherwise. */ template -static inline bool slave_sleep(THD *thd, time_t seconds, - killed_func func, rpl_info info) +static bool slave_sleep(THD *thd, time_t seconds, + killed_func func, rpl_info info) { bool ret; @@ -2813,7 +2831,7 @@ static inline bool slave_sleep(THD *thd, time_t seconds, mysql_mutex_lock(lock); old_proc_info= thd->enter_cond(cond, lock, thd->proc_info); - while (! (ret= func(thd, info))) + while (! (ret= func(info))) { int error= mysql_cond_timedwait(cond, lock, &abstime); if (error == ETIMEDOUT || error == ETIME) @@ -3024,7 +3042,6 @@ int apply_event_and_update_pos(Log_event* ev, THD* thd, { int exec_res= 0; Relay_log_info* rli= rgi->rli; - DBUG_ENTER("apply_event_and_update_pos"); DBUG_PRINT("exec_event",("%s(type_code: %d; server_id: %d)", @@ -3074,7 +3091,7 @@ int apply_event_and_update_pos(Log_event* ev, THD* thd, (ev->flags & LOG_EVENT_SKIP_REPLICATION_F ? OPTION_SKIP_REPLICATION : 0); ev->thd = thd; // because up to this point, ev->thd == 0 - int reason= ev->shall_skip(rli); + int reason= ev->shall_skip(rgi); if (reason == Log_event::EVENT_SKIP_COUNT) { DBUG_ASSERT(rli->slave_skip_counter > 0); @@ -3098,9 +3115,10 @@ int apply_event_and_update_pos(Log_event* ev, THD* thd, // EVENT_SKIP_COUNT "skipped because event skip counter was non-zero" }; - DBUG_PRINT("info", ("OPTION_BEGIN: %d; IN_STMT: %d", + DBUG_PRINT("info", ("OPTION_BEGIN: %d IN_STMT: %d IN_TRANSACTION: %d", test(thd->variables.option_bits & OPTION_BEGIN), - rli->get_flag(Relay_log_info::IN_STMT))); + rli->get_flag(Relay_log_info::IN_STMT), + rli->get_flag(Relay_log_info::IN_TRANSACTION))); DBUG_PRINT("skip_event", ("%s event was %s", ev->get_type_str(), explain[reason])); #endif @@ -3149,6 +3167,80 @@ int apply_event_and_update_pos(Log_event* ev, THD* thd, } +/** + Keep the relay log transaction state up to date. + + The state reflects how things are after the given event, that has just been + read from the relay log, is executed. + + This is only needed to ensure we: + - Don't abort the sql driver thread in the middle of an event group. + - Don't rotate the io thread in the middle of a statement or transaction. + The mechanism is that the io thread, when it needs to rotate the relay + log, will wait until the sql driver has read all the cached events + and then continue reading events one by one from the master until + the sql threads signals that log doesn't have an active group anymore. + + There are two possible cases. We keep them as 2 separate flags mainly + to make debugging easier. + + - IN_STMT is set when we have read an event that should be used + together with the next event. This is for example setting a + variable that is used when executing the next statement. + - IN_TRANSACTION is set when we are inside a BEGIN...COMMIT group + + To test the state one should use the is_in_group() function. +*/ + +inline void update_state_of_relay_log(Relay_log_info *rli, Log_event *ev) +{ + Log_event_type typ= ev->get_type_code(); + + /* check if we are in a multi part event */ + if (ev->is_part_of_group()) + rli->set_flag(Relay_log_info::IN_STMT); + else if (Log_event::is_group_event(typ)) + { + /* + If it was not a is_part_of_group() and not a group event (like + rotate) then we can reset the IN_STMT flag. We have the above + if only to allow us to have a rotate element anywhere. + */ + rli->clear_flag(Relay_log_info::IN_STMT); + } + + /* Check for an event that starts or stops a transaction */ + if (typ == QUERY_EVENT) + { + Query_log_event *qev= (Query_log_event*) ev; + /* + Trivial optimization to avoid the following somewhat expensive + checks. + */ + if (qev->q_len <= sizeof("ROLLBACK")) + { + if (qev->is_begin()) + rli->set_flag(Relay_log_info::IN_TRANSACTION); + if (qev->is_commit() || qev->is_rollback()) + rli->clear_flag(Relay_log_info::IN_TRANSACTION); + } + } + if (typ == XID_EVENT) + rli->clear_flag(Relay_log_info::IN_TRANSACTION); + if (typ == GTID_EVENT && + !(((Gtid_log_event*) ev)->flags2 & Gtid_log_event::FL_STANDALONE)) + { + /* This GTID_EVENT will generate a BEGIN event */ + rli->set_flag(Relay_log_info::IN_TRANSACTION); + } + + DBUG_PRINT("info", ("event: %u IN_STMT: %d IN_TRANSACTION: %d", + (uint) typ, + rli->get_flag(Relay_log_info::IN_STMT), + rli->get_flag(Relay_log_info::IN_TRANSACTION))); +} + + /** Top-level function for executing the next event from the relay log. @@ -3177,23 +3269,22 @@ int apply_event_and_update_pos(Log_event* ev, THD* thd, @retval 1 The event was not applied. */ + static int exec_relay_log_event(THD* thd, Relay_log_info* rli, rpl_group_info *serial_rgi) { DBUG_ENTER("exec_relay_log_event"); /* - We acquire this mutex since we need it for all operations except - event execution. But we will release it in places where we will - wait for something for example inside of next_event(). - */ + We acquire this mutex since we need it for all operations except + event execution. But we will release it in places where we will + wait for something for example inside of next_event(). + */ mysql_mutex_lock(&rli->data_lock); - Log_event * ev = next_event(rli); + Log_event * ev = next_event(serial_rgi); - DBUG_ASSERT(rli->sql_thd==thd); - - if (sql_slave_killed(thd,rli)) + if (sql_slave_killed(serial_rgi)) { mysql_mutex_unlock(&rli->data_lock); delete ev; @@ -3216,8 +3307,8 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, sql_print_information("Slave SQL thread stopped because it reached its" " UNTIL position %s", llstr(rli->until_pos(), buf)); /* - Setting abort_slave flag because we do not want additional message about - error in query execution to be printed. + Setting abort_slave flag because we do not want additional + message about error in query execution to be printed. */ rli->abort_slave= 1; mysql_mutex_unlock(&rli->data_lock); @@ -3245,7 +3336,14 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, };); } - if (opt_slave_parallel_threads > 0) + update_state_of_relay_log(rli, ev); + + /* + Execute queries in parallel, except if slave_skip_counter is set, + as it's is easier to skip queries in single threaded mode. + */ + + if (opt_slave_parallel_threads > 0 && rli->slave_skip_counter == 0) DBUG_RETURN(rli->parallel.do_event(serial_rgi, ev)); /* @@ -3310,7 +3408,7 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, serial_rgi->cleanup_context(thd, 1); /* chance for concurrent connection to get more locks */ slave_sleep(thd, min(rli->trans_retries, MAX_SLAVE_RETRY_PAUSE), - sql_slave_killed, rli); + sql_slave_killed, serial_rgi); mysql_mutex_lock(&rli->data_lock); // because of SHOW STATUS rli->trans_retries++; rli->retried_trans++; @@ -3358,9 +3456,9 @@ on this slave.\ } -static bool check_io_slave_killed(THD *thd, Master_info *mi, const char *info) +static bool check_io_slave_killed(Master_info *mi, const char *info) { - if (io_slave_killed(thd, mi)) + if (io_slave_killed(mi)) { if (info && global_system_variables.log_warnings) sql_print_information("%s", info); @@ -3411,7 +3509,7 @@ static int try_to_reconnect(THD *thd, MYSQL *mysql, Master_info *mi, return 1; // Don't retry forever slave_sleep(thd, mi->connect_retry, io_slave_killed, mi); } - if (check_io_slave_killed(thd, mi, messages[SLAVE_RECON_MSG_KILLED_WAITING])) + if (check_io_slave_killed(mi, messages[SLAVE_RECON_MSG_KILLED_WAITING])) return 1; thd->proc_info = messages[SLAVE_RECON_MSG_AFTER]; if (!suppress_warnings) @@ -3448,7 +3546,7 @@ static int try_to_reconnect(THD *thd, MYSQL *mysql, Master_info *mi, sql_print_information("%s", buf); } } - if (safe_reconnect(thd, mysql, mi, 1) || io_slave_killed(thd, mi)) + if (safe_reconnect(thd, mysql, mi, 1) || io_slave_killed(mi)) { if (global_system_variables.log_warnings) sql_print_information("%s", messages[SLAVE_RECON_MSG_KILLED_AFTER]); @@ -3631,11 +3729,14 @@ connected: if (ret == 2) { - if (check_io_slave_killed(mi->io_thd, mi, "Slave I/O thread killed" + if (check_io_slave_killed(mi, "Slave I/O thread killed" "while calling get_master_version_and_clock(...)")) goto err; suppress_warnings= FALSE; - /* Try to reconnect because the error was caused by a transient network problem */ + /* + Try to reconnect because the error was caused by a transient network + problem + */ if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings, reconnect_messages[SLAVE_RECON_ACT_REG])) goto err; @@ -3650,7 +3751,7 @@ connected: thd_proc_info(thd, "Registering slave on master"); if (register_slave_on_master(mysql, mi, &suppress_warnings)) { - if (!check_io_slave_killed(thd, mi, "Slave I/O thread killed " + if (!check_io_slave_killed(mi, "Slave I/O thread killed " "while registering slave on master")) { sql_print_error("Slave I/O thread couldn't register on master"); @@ -3675,13 +3776,13 @@ connected: } DBUG_PRINT("info",("Starting reading binary log from master")); - while (!io_slave_killed(thd,mi)) + while (!io_slave_killed(mi)) { thd_proc_info(thd, "Requesting binlog dump"); if (request_dump(thd, mysql, mi, &suppress_warnings)) { sql_print_error("Failed on request_dump()"); - if (check_io_slave_killed(thd, mi, "Slave I/O thread killed while \ + if (check_io_slave_killed(mi, "Slave I/O thread killed while \ requesting master dump") || try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings, reconnect_messages[SLAVE_RECON_ACT_DUMP])) @@ -3701,7 +3802,7 @@ requesting master dump") || const char *event_buf; DBUG_ASSERT(mi->last_error().number == 0); - while (!io_slave_killed(thd,mi)) + while (!io_slave_killed(mi)) { ulong event_len; /* @@ -3712,7 +3813,7 @@ requesting master dump") || */ thd_proc_info(thd, "Waiting for master to send event"); event_len= read_event(mysql, mi, &suppress_warnings); - if (check_io_slave_killed(thd, mi, "Slave I/O thread killed while \ + if (check_io_slave_killed(mi, "Slave I/O thread killed while \ reading event")) goto err; DBUG_EXECUTE_IF("FORCE_SLAVE_TO_RECONNECT_EVENT", @@ -3802,10 +3903,11 @@ Stopping slave I/O thread due to out-of-memory error from master"); - if mi->rli.ignore_log_space_limit is 1 but becomes 0 just after (so the clean value is 0), then we are reading only one more event as we should, and we'll block only at the next event. No big deal. - - if mi->rli.ignore_log_space_limit is 0 but becomes 1 just after (so - the clean value is 1), then we are going into wait_for_relay_log_space() - for no reason, but this function will do a clean read, notice the clean - value and exit immediately. + - if mi->rli.ignore_log_space_limit is 0 but becomes 1 just + after (so the clean value is 1), then we are going into + wait_for_relay_log_space() for no reason, but this function + will do a clean read, notice the clean value and exit + immediately. */ #ifndef DBUG_OFF { @@ -3866,7 +3968,7 @@ err: mi->mysql=0; } write_ignored_events_info_to_relay_log(thd, mi); - thd_proc_info(thd, "Waiting for slave mutex on exit"); + thd_proc_info(thd, "Slave io thread waiting for slave mutex on exit"); mysql_mutex_lock(&mi->run_lock); err_during_init: @@ -3996,7 +4098,6 @@ pthread_handler_t handle_slave_sql(void *arg) thd = new THD; // note that contructor of THD uses DBUG_ ! thd->thread_stack = (char*)&thd; // remember where our stack is thd->rpl_filter = mi->rpl_filter; - serial_rgi->thd= thd; DBUG_ASSERT(rli->inited); DBUG_ASSERT(rli->mi == mi); @@ -4007,7 +4108,15 @@ pthread_handler_t handle_slave_sql(void *arg) rli->events_till_abort = abort_slave_event_count; #endif - rli->sql_thd= thd; + /* + THD for the sql driver thd. In parallel replication this is the thread + that reads things from the relay log and calls rpl_parallel::do_event() + to execute queries. + + In single thread replication this is the THD for the thread that is + executing SQL queries too. + */ + serial_rgi->thd= rli->sql_driver_thd= thd; /* Inform waiting threads that slave has started */ rli->slave_run_id++; @@ -4032,8 +4141,6 @@ pthread_handler_t handle_slave_sql(void *arg) serial_rgi->deferred_events= new Deferred_log_events(rli); } - thd->temporary_tables = rli->save_temporary_tables; // restore temp tables - set_thd_in_use_temporary_tables(rli); // (re)set sql_thd in use for saved temp tables /* binlog_annotate_row_events must be TRUE only after an Annotate_rows event has been recieved and only till the last corresponding rbr event has been @@ -4110,7 +4217,6 @@ pthread_handler_t handle_slave_sql(void *arg) #endif } #endif - DBUG_ASSERT(rli->sql_thd == thd); DBUG_PRINT("master_info",("log_file_name: %s position: %s", rli->group_master_log_name, @@ -4193,10 +4299,9 @@ log '%s' at position %s, relay log '%s' position: %s%s", RPL_LOG_NAME, /* Read queries from the IO/THREAD until this thread is killed */ - while (!sql_slave_killed(thd,rli)) + while (!sql_slave_killed(serial_rgi)) { thd_proc_info(thd, "Reading event from the relay log"); - DBUG_ASSERT(rli->sql_thd == thd); THD_CHECK_SENTRY(thd); if (saved_skip && rli->slave_skip_counter == 0) @@ -4217,7 +4322,7 @@ log '%s' at position %s, relay log '%s' position: %s%s", RPL_LOG_NAME, { DBUG_PRINT("info", ("exec_relay_log_event() failed")); // do not scare the user if SQL thread was simply killed or stopped - if (!sql_slave_killed(thd,rli)) + if (!sql_slave_killed(serial_rgi)) { /* retrieve as much info as possible from the thd and, error @@ -4349,7 +4454,7 @@ the slave SQL thread with \"SLAVE START\". We stopped at log \ thd->catalog= 0; thd->reset_query(); thd->reset_db(NULL, 0); - thd_proc_info(thd, "Waiting for slave mutex on exit"); + thd_proc_info(thd, "Sql driver thread waiting for slave mutex on exit"); mysql_mutex_lock(&rli->run_lock); err_during_init: /* We need data_lock, at least to wake up any waiting master_pos_wait() */ @@ -4367,17 +4472,14 @@ err_during_init: rli->ignore_log_space_limit= 0; /* don't need any lock */ /* we die so won't remember charset - re-update them on next thread start */ rli->cached_charset_invalidate(); - rli->save_temporary_tables = thd->temporary_tables; /* TODO: see if we can do this conditionally in next_event() instead to avoid unneeded position re-init */ thd->temporary_tables = 0; // remove tempation from destructor to close them - DBUG_ASSERT(rli->sql_thd == thd); THD_CHECK_SENTRY(thd); - rli->sql_thd= 0; - set_thd_in_use_temporary_tables(rli); // (re)set sql_thd in use for saved temp tables + serial_rgi->thd= rli->sql_driver_thd= 0; mysql_mutex_lock(&LOCK_thread_count); THD_CHECK_SENTRY(thd); delete thd; @@ -5474,7 +5576,7 @@ static int connect_to_master(THD* thd, MYSQL* mysql, Master_info* mi, "terminated."); DBUG_RETURN(1); } - while (!(slave_was_killed = io_slave_killed(thd,mi)) && + while (!(slave_was_killed = io_slave_killed(mi)) && (reconnect ? mysql_reconnect(mysql) != 0 : mysql_real_connect(mysql, mi->host, mi->user, mi->password, 0, mi->port, 0, client_flag) == 0)) @@ -5552,19 +5654,20 @@ static int safe_reconnect(THD* thd, MYSQL* mysql, Master_info* mi, } +#ifdef NOT_USED MYSQL *rpl_connect_master(MYSQL *mysql) { - THD *thd= current_thd; Master_info *mi= my_pthread_getspecific_ptr(Master_info*, RPL_MASTER_INFO); bool allocated= false; my_bool my_true= 1; + THD *thd; if (!mi) { sql_print_error("'rpl_connect_master' must be called in slave I/O thread context."); return NULL; } - + thd= mi->io_thd; if (!mysql) { if(!(mysql= mysql_init(NULL))) @@ -5607,11 +5710,11 @@ MYSQL *rpl_connect_master(MYSQL *mysql) if (mi->user == NULL || mi->user[0] == 0 - || io_slave_killed(thd, mi) + || io_slave_killed( mi) || !mysql_real_connect(mysql, mi->host, mi->user, mi->password, 0, mi->port, 0, 0)) { - if (!io_slave_killed(thd, mi)) + if (!io_slave_killed( mi)) sql_print_error("rpl_connect_master: error connecting to master: %s (server_error: %d)", mysql_error(mysql), mysql_errno(mysql)); @@ -5621,6 +5724,7 @@ MYSQL *rpl_connect_master(MYSQL *mysql) } return mysql; } +#endif /* Store the file and position where the execute-slave thread are in the @@ -5727,16 +5831,17 @@ static IO_CACHE *reopen_relay_log(Relay_log_info *rli, const char **errmsg) error is reported through the sql_print_information() or sql_print_error() functions. */ -static Log_event* next_event(Relay_log_info* rli) +static Log_event* next_event(rpl_group_info *rgi) { Log_event* ev; + Relay_log_info *rli= rgi->rli; IO_CACHE* cur_log = rli->cur_log; mysql_mutex_t *log_lock = rli->relay_log.get_log_lock(); const char* errmsg=0; - THD* thd = rli->sql_thd; + THD *thd = rgi->thd; DBUG_ENTER("next_event"); - DBUG_ASSERT(thd != 0); + DBUG_ASSERT(thd != 0 && thd == rli->sql_driver_thd); #ifndef DBUG_OFF if (abort_slave_event_count && !rli->events_till_abort--) @@ -5752,7 +5857,7 @@ static Log_event* next_event(Relay_log_info* rli) */ mysql_mutex_assert_owner(&rli->data_lock); - while (!sql_slave_killed(thd,rli)) + while (!sql_slave_killed(rgi)) { /* We can have two kinds of log reading: @@ -5821,7 +5926,6 @@ static Log_event* next_event(Relay_log_info* rli) opt_slave_sql_verify_checksum))) { - DBUG_ASSERT(thd==rli->sql_thd); /* read it while we have a lock, to avoid a mutex lock in inc_event_relay_log_pos() @@ -5832,7 +5936,6 @@ static Log_event* next_event(Relay_log_info* rli) mysql_mutex_unlock(log_lock); DBUG_RETURN(ev); } - DBUG_ASSERT(thd==rli->sql_thd); if (opt_reckless_slave) // For mysql-test cur_log->error = 0; if (cur_log->error < 0) @@ -5920,14 +6023,15 @@ static Log_event* next_event(Relay_log_info* rli) and reads one more event and starts honoring log_space_limit again. If the SQL thread needs more events to be able to rotate the log (it - might need to finish the current group first), then it can ask for one - more at a time. Thus we don't outgrow the relay log indefinitely, + might need to finish the current group first), then it can ask for + one more at a time. Thus we don't outgrow the relay log indefinitely, but rather in a controlled manner, until the next rotate. When the SQL thread starts it sets ignore_log_space_limit to false. We should also reset ignore_log_space_limit to 0 when the user does - RESET SLAVE, but in fact, no need as RESET SLAVE requires that the slave - be stopped, and the SQL thread sets ignore_log_space_limit to 0 when + RESET SLAVE, but in fact, no need as RESET SLAVE requires that the + slave be stopped, and the SQL thread sets ignore_log_space_limit + to 0 when it stops. */ mysql_mutex_lock(&rli->log_space_lock); @@ -5965,7 +6069,7 @@ static Log_event* next_event(Relay_log_info* rli) mysql_mutex_unlock(&rli->log_space_lock); mysql_cond_broadcast(&rli->log_space_cond); // Note that wait_for_update_relay_log unlocks lock_log ! - rli->relay_log.wait_for_update_relay_log(rli->sql_thd); + rli->relay_log.wait_for_update_relay_log(rli->sql_driver_thd); // re-acquire data lock since we released it earlier mysql_mutex_lock(&rli->data_lock); rli->last_master_timestamp= save_timestamp; diff --git a/sql/sql_base.cc b/sql/sql_base.cc index 109a4ef41e9..80c0b98fd73 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -57,6 +57,7 @@ #include "sql_table.h" // build_table_filename #include "datadict.h" // dd_frm_is_view() #include "sql_hset.h" // Hash_set +#include "rpl_rli.h" // rpl_group_info #ifdef __WIN__ #include #endif @@ -1230,11 +1231,24 @@ bool close_cached_connection_tables(THD *thd, LEX_STRING *connection) static void mark_temp_tables_as_free_for_reuse(THD *thd) { + DBUG_ENTER("mark_temp_tables_as_free_for_reuse"); + + thd->lock_temporary_tables(); for (TABLE *table= thd->temporary_tables ; table ; table= table->next) { if ((table->query_id == thd->query_id) && ! table->open_by_handler) mark_tmp_table_for_reuse(table); } + thd->unlock_temporary_tables(); + if (thd->rgi_slave) + { + /* + Temporary tables are shared with other by sql execution threads. + As a safety messure, clear the pointer to the common area. + */ + thd->temporary_tables= 0; + } + DBUG_VOID_RETURN; } @@ -1248,6 +1262,7 @@ static void mark_temp_tables_as_free_for_reuse(THD *thd) void mark_tmp_table_for_reuse(TABLE *table) { + DBUG_ENTER("mark_tmp_table_for_reuse"); DBUG_ASSERT(table->s->tmp_table); table->query_id= 0; @@ -1278,6 +1293,7 @@ void mark_tmp_table_for_reuse(TABLE *table) LOCK TABLES is allowed (but ignored) for a temporary table. */ table->reginfo.lock_type= TL_WRITE; + DBUG_VOID_RETURN; } @@ -1628,6 +1644,10 @@ static inline uint tmpkeyval(THD *thd, TABLE *table) /* Close all temporary tables created by 'CREATE TEMPORARY TABLE' for thread creates one DROP TEMPORARY TABLE binlog event for each pseudo-thread + + Temporary tables created in a sql slave is closed by + Relay_log_info::close_temporary_tables() + */ bool close_temporary_tables(THD *thd) @@ -1642,6 +1662,7 @@ bool close_temporary_tables(THD *thd) if (!thd->temporary_tables) DBUG_RETURN(FALSE); + DBUG_ASSERT(!thd->rgi_slave); if (!mysql_bin_log.is_open()) { @@ -2096,16 +2117,42 @@ TABLE *find_temporary_table(THD *thd, const char *table_key, uint table_key_length) { + TABLE *result= 0; + if (!thd->have_temporary_tables()) + return NULL; + + thd->lock_temporary_tables(); for (TABLE *table= thd->temporary_tables; table; table= table->next) { if (table->s->table_cache_key.length == table_key_length && !memcmp(table->s->table_cache_key.str, table_key, table_key_length)) { - return table; + /* + We need to set the THD as it may be different in case of + parallel replication + */ + if (table->in_use != thd) + { + table->in_use= thd; +#ifdef REMOVE_AFTER_MERGE_WITH_10 + if (thd->rgi_slave) + { + /* + We may be stealing an opened temporary tables from one slave + thread to another, we need to let the performance schema know that, + for aggregates per thread to work properly. + */ + table->file->unbind_psi(); + table->file->rebind_psi(); + } +#endif + } + result= table; + break; } } - - return NULL; + thd->unlock_temporary_tables(); + return result; } @@ -2153,6 +2200,9 @@ int drop_temporary_table(THD *thd, TABLE_LIST *table_list, bool *is_trans) /* Table might be in use by some outer statement. */ if (table->query_id && table->query_id != thd->query_id) { + DBUG_PRINT("info", ("table->query_id: %lu thd->query_id: %lu", + (ulong) table->query_id, (ulong) thd->query_id)); + my_error(ER_CANT_REOPEN_TABLE, MYF(0), table->alias.c_ptr()); DBUG_RETURN(-1); } @@ -2181,6 +2231,7 @@ void close_temporary_table(THD *thd, TABLE *table, table->s->db.str, table->s->table_name.str, (long) table, table->alias.c_ptr())); + thd->lock_temporary_tables(); if (table->prev) { table->prev->next= table->next; @@ -2200,12 +2251,14 @@ void close_temporary_table(THD *thd, TABLE *table, if (thd->temporary_tables) table->next->prev= 0; } - if (thd->slave_thread) + if (thd->rgi_slave) { /* natural invariant of temporary_tables */ DBUG_ASSERT(slave_open_temp_tables || !thd->temporary_tables); - slave_open_temp_tables--; + thread_safe_decrement32(&slave_open_temp_tables, &thread_running_lock); + table->in_use= 0; // No statistics } + thd->unlock_temporary_tables(); close_temporary(table, free_share, delete_table); DBUG_VOID_RETURN; } @@ -2651,35 +2704,30 @@ bool open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root, TODO: move this block into a separate function. */ if (table_list->open_type != OT_BASE_ONLY && - ! (flags & MYSQL_OPEN_SKIP_TEMPORARY)) + ! (flags & MYSQL_OPEN_SKIP_TEMPORARY) && thd->have_temporary_tables()) { - for (table= thd->temporary_tables; table ; table=table->next) - { - if (table->s->table_cache_key.length == key_length + - TMP_TABLE_KEY_EXTRA && - !memcmp(table->s->table_cache_key.str, key, - key_length + TMP_TABLE_KEY_EXTRA)) + if ((table= find_temporary_table(thd, key, + key_length + TMP_TABLE_KEY_EXTRA))) + { + /* + Check if we're trying to use the same temporary table twice in a query. + Right now we don't support this because a temporary table + is always represented by only one TABLE object in THD, and + it can not be cloned. Emit an error for an unsupported behaviour. + */ + if (table->query_id) { - /* - We're trying to use the same temporary table twice in a query. - Right now we don't support this because a temporary table - is always represented by only one TABLE object in THD, and - it can not be cloned. Emit an error for an unsupported behaviour. - */ - if (table->query_id) - { - DBUG_PRINT("error", - ("query_id: %lu server_id: %u pseudo_thread_id: %lu", - (ulong) table->query_id, (uint) thd->variables.server_id, - (ulong) thd->variables.pseudo_thread_id)); - my_error(ER_CANT_REOPEN_TABLE, MYF(0), table->alias.c_ptr()); - DBUG_RETURN(TRUE); - } - table->query_id= thd->query_id; - thd->thread_specific_used= TRUE; - DBUG_PRINT("info",("Using temporary table")); - goto reset; + DBUG_PRINT("error", + ("query_id: %lu server_id: %u pseudo_thread_id: %lu", + (ulong) table->query_id, (uint) thd->variables.server_id, + (ulong) thd->variables.pseudo_thread_id)); + my_error(ER_CANT_REOPEN_TABLE, MYF(0), table->alias.c_ptr()); + DBUG_RETURN(TRUE); } + table->query_id= thd->query_id; + thd->thread_specific_used= TRUE; + DBUG_PRINT("info",("Using temporary table")); + goto reset; } } @@ -5987,14 +6035,18 @@ TABLE *open_table_uncached(THD *thd, handlerton *hton, if (add_to_temporary_tables_list) { + thd->lock_temporary_tables(); /* growing temp list at the head */ tmp_table->next= thd->temporary_tables; if (tmp_table->next) tmp_table->next->prev= tmp_table; thd->temporary_tables= tmp_table; thd->temporary_tables->prev= 0; - if (thd->slave_thread) - slave_open_temp_tables++; + if (thd->rgi_slave) + { + thread_safe_increment32(&slave_open_temp_tables, &thread_running_lock); + } + thd->unlock_temporary_tables(); } tmp_table->pos_in_table_list= 0; DBUG_PRINT("tmptable", ("opened table: '%s'.'%s' 0x%lx", tmp_table->s->db.str, diff --git a/sql/sql_binlog.cc b/sql/sql_binlog.cc index 4f41b942345..9bcfe64cf2d 100644 --- a/sql/sql_binlog.cc +++ b/sql/sql_binlog.cc @@ -99,6 +99,7 @@ void mysql_client_binlog_statement(THD* thd) } if (!(rgi= thd->rgi_fake)) rgi= thd->rgi_fake= new rpl_group_info(rli); + rgi->thd= thd; const char *error= 0; char *buf= (char *) my_malloc(decoded_len, MYF(MY_WME)); @@ -115,7 +116,7 @@ void mysql_client_binlog_statement(THD* thd) goto end; } - rli->sql_thd= thd; + rli->sql_driver_thd= thd; rli->no_storage= TRUE; for (char const *strptr= thd->lex->comment.str ; @@ -200,8 +201,6 @@ void mysql_client_binlog_statement(THD* thd) } } - rgi->rli= rli; - rgi->thd= thd; ev= Log_event::read_log_event(bufptr, event_len, &error, rli->relay_log.description_event_for_exec, 0); diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 714adfba8f7..f424e34969d 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -5597,6 +5597,24 @@ THD::signal_wakeup_ready() } +void THD::rgi_lock_temporary_tables() +{ + mysql_mutex_lock(&rgi_slave->rli->data_lock); + temporary_tables= rgi_slave->rli->save_temporary_tables; +} + +void THD::rgi_unlock_temporary_tables() +{ + rgi_slave->rli->save_temporary_tables= temporary_tables; + mysql_mutex_unlock(&rgi_slave->rli->data_lock); +} + +bool THD::rgi_have_temporary_tables() +{ + return rgi_slave->rli->save_temporary_tables != 0; +} + + wait_for_commit::wait_for_commit() : subsequent_commits_list(0), next_subsequent_commit(0), waitee(0), opaque_pointer(0), diff --git a/sql/sql_class.h b/sql/sql_class.h index c34c100171d..01121fd5b35 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -3371,6 +3371,27 @@ private: bool wakeup_ready; mysql_mutex_t LOCK_wakeup_ready; mysql_cond_t COND_wakeup_ready; + + /* Protect against add/delete of temporary tables in parallel replication */ + void rgi_lock_temporary_tables(); + void rgi_unlock_temporary_tables(); + bool rgi_have_temporary_tables(); +public: + inline void lock_temporary_tables() + { + if (rgi_slave) + rgi_lock_temporary_tables(); + } + inline void unlock_temporary_tables() + { + if (rgi_slave) + rgi_unlock_temporary_tables(); + } + inline bool have_temporary_tables() + { + return (temporary_tables || + (rgi_slave && rgi_have_temporary_tables())); + } }; diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 3a499145a63..495f0d591dd 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -3955,6 +3955,7 @@ end_with_restore_list: break; case SQLCOM_BEGIN: + DBUG_PRINT("info", ("Executing SQLCOM_BEGIN thd: %p", thd)); if (trans_begin(thd, lex->start_transaction_opt)) goto error; my_ok(thd); diff --git a/sql/transaction.cc b/sql/transaction.cc index 1623cd57d77..3117cd7d166 100644 --- a/sql/transaction.cc +++ b/sql/transaction.cc @@ -139,6 +139,11 @@ bool trans_begin(THD *thd, uint flags) } thd->variables.option_bits&= ~(OPTION_BEGIN | OPTION_KEEP_LOG); + + /* + The following set should not be needed as the flag should always be 0 + when we come here. We should at some point change this to an assert. + */ thd->transaction.all.modified_non_trans_table= FALSE; if (res) From 2842f6b5dc254c82aa3dc976cd5bd3645dc82a60 Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 14 Oct 2013 15:28:16 +0200 Subject: [PATCH 28/41] MDEV-4506: Parallel replication: error handling. Add an error code to the wait_for_commit facility. Now, when a transaction fails, it can signal the error to any subsequent transaction that is waiting for it to commit. The waiting transactions then receive the error code back from wait_for_prior_commit() and can handle the error appropriately. Also fix one race that could cause crash if @@slave_parallel_threads were changed several times quickly in succession. --- include/mysql/plugin.h | 2 +- include/mysql/plugin_audit.h.pp | 2 +- include/mysql/plugin_auth.h.pp | 2 +- include/mysql/plugin_ftparser.h.pp | 2 +- sql/handler.cc | 11 ++++----- sql/log.cc | 9 ++++---- sql/rpl_parallel.cc | 15 +++++++++---- sql/share/errmsg-utf8.txt | 2 ++ sql/sql_class.cc | 18 +++++++++------ sql/sql_class.h | 32 ++++++++++++++++++--------- storage/innobase/handler/ha_innodb.cc | 2 +- storage/xtradb/handler/ha_innodb.cc | 2 +- 12 files changed, 62 insertions(+), 37 deletions(-) diff --git a/include/mysql/plugin.h b/include/mysql/plugin.h index ab72a9d106b..9ac63f08f73 100644 --- a/include/mysql/plugin.h +++ b/include/mysql/plugin.h @@ -716,7 +716,7 @@ void thd_set_ha_data(MYSQL_THD thd, const struct handlerton *hton, thd_wakeup_subsequent_commits() is only needed when no transaction coordinator is used, meaning a single storage engine and no binary log. */ -void thd_wakeup_subsequent_commits(MYSQL_THD thd); +void thd_wakeup_subsequent_commits(MYSQL_THD thd, int wakeup_error); #ifdef __cplusplus } diff --git a/include/mysql/plugin_audit.h.pp b/include/mysql/plugin_audit.h.pp index 564dd6272f5..17e5c191672 100644 --- a/include/mysql/plugin_audit.h.pp +++ b/include/mysql/plugin_audit.h.pp @@ -236,7 +236,7 @@ void mysql_query_cache_invalidate4(void* thd, void *thd_get_ha_data(const void* thd, const struct handlerton *hton); void thd_set_ha_data(void* thd, const struct handlerton *hton, const void *ha_data); -void thd_wakeup_subsequent_commits(void* thd); +void thd_wakeup_subsequent_commits(void* thd, int wakeup_error); struct mysql_event_general { unsigned int event_subclass; diff --git a/include/mysql/plugin_auth.h.pp b/include/mysql/plugin_auth.h.pp index edfd7095203..33b552ec75b 100644 --- a/include/mysql/plugin_auth.h.pp +++ b/include/mysql/plugin_auth.h.pp @@ -236,7 +236,7 @@ void mysql_query_cache_invalidate4(void* thd, void *thd_get_ha_data(const void* thd, const struct handlerton *hton); void thd_set_ha_data(void* thd, const struct handlerton *hton, const void *ha_data); -void thd_wakeup_subsequent_commits(void* thd); +void thd_wakeup_subsequent_commits(void* thd, int wakeup_error); #include typedef struct st_plugin_vio_info { diff --git a/include/mysql/plugin_ftparser.h.pp b/include/mysql/plugin_ftparser.h.pp index 0cc51e259dc..b4aa962c51c 100644 --- a/include/mysql/plugin_ftparser.h.pp +++ b/include/mysql/plugin_ftparser.h.pp @@ -189,7 +189,7 @@ void mysql_query_cache_invalidate4(void* thd, void *thd_get_ha_data(const void* thd, const struct handlerton *hton); void thd_set_ha_data(void* thd, const struct handlerton *hton, const void *ha_data); -void thd_wakeup_subsequent_commits(void* thd); +void thd_wakeup_subsequent_commits(void* thd, int wakeup_error); enum enum_ftparser_mode { MYSQL_FTPARSER_SIMPLE_MODE= 0, diff --git a/sql/handler.cc b/sql/handler.cc index c42204b27d1..672e1cb4e42 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -1458,10 +1458,11 @@ int ha_commit_one_phase(THD *thd, bool all) transaction.all.ha_list, see why in trans_register_ha()). */ bool is_real_trans=all || thd->transaction.all.ha_list == 0; + int res; DBUG_ENTER("ha_commit_one_phase"); - if (is_real_trans) - thd->wait_for_prior_commit(); - int res= commit_one_phase_2(thd, all, trans, is_real_trans); + if (is_real_trans && (res= thd->wait_for_prior_commit())) + DBUG_RETURN(res); + res= commit_one_phase_2(thd, all, trans, is_real_trans); DBUG_RETURN(res); } @@ -1501,7 +1502,7 @@ commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans, bool is_real_trans) /* Free resources and perform other cleanup even for 'empty' transactions. */ if (is_real_trans) { - thd->wakeup_subsequent_commits(); + thd->wakeup_subsequent_commits(error); thd->transaction.cleanup(); } @@ -1579,7 +1580,7 @@ int ha_rollback_trans(THD *thd, bool all) /* Always cleanup. Even if nht==0. There may be savepoints. */ if (is_real_trans) { - thd->wakeup_subsequent_commits(); + thd->wakeup_subsequent_commits(error); thd->transaction.cleanup(); } if (all) diff --git a/sql/log.cc b/sql/log.cc index dd6eeb3678c..95091875d83 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -6743,7 +6743,7 @@ MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *orig_entry) cur->wakeup_subsequent_commits_running= true; mysql_mutex_unlock(&cur->LOCK_wait_commit); } - waiter->wakeup(); + waiter->wakeup(0); } waiter= next; } @@ -6849,7 +6849,7 @@ MYSQL_BIN_LOG::write_transaction_to_binlog_events(group_commit_entry *entry) field. */ if (next->queued_by_other) - next->thd->wait_for_commit_ptr->wakeup(); + next->thd->wait_for_commit_ptr->wakeup(entry->error); else next->thd->signal_wakeup_ready(); } @@ -7145,7 +7145,7 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader) if (current != leader) // Don't wake up ourself { if (current->queued_by_other) - current->thd->wait_for_commit_ptr->wakeup(); + current->thd->wait_for_commit_ptr->wakeup(current->error); else current->thd->signal_wakeup_ready(); } @@ -7844,7 +7844,8 @@ int TC_LOG_MMAP::log_and_order(THD *thd, my_xid xid, bool all, mysql_mutex_unlock(&LOCK_prepare_ordered); } - thd->wait_for_prior_commit(); + if (thd->wait_for_prior_commit()) + return 0; cookie= 0; if (xid) diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index c10a035c599..c6411b01e60 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -52,7 +52,7 @@ struct rpl_parallel_thread_pool global_rpl_thread_pool; -static void +static int rpt_handle_event(rpl_parallel_thread::queued_event *qev, struct rpl_parallel_thread *rpt) { @@ -70,6 +70,7 @@ rpt_handle_event(rpl_parallel_thread::queued_event *qev, err= apply_event_and_update_pos(qev->ev, thd, rgi, rpt); thd->rgi_slave= NULL; /* ToDo: error handling. */ + return err; } @@ -104,6 +105,7 @@ handle_rpl_parallel_thread(void *arg) bool group_standalone= true; bool in_event_group= false; uint64 event_gtid_sub_id= 0; + int err; struct rpl_parallel_thread *rpt= (struct rpl_parallel_thread *)arg; @@ -139,6 +141,7 @@ handle_rpl_parallel_thread(void *arg) mysql_cond_wait(&rpt->COND_rpl_thread, &rpt->LOCK_rpl_thread); rpt->running= true; + mysql_cond_signal(&rpt->COND_rpl_thread); while (!rpt->stop && !thd->killed) { @@ -163,6 +166,7 @@ handle_rpl_parallel_thread(void *arg) uint64 wait_start_sub_id; bool end_of_group; + err= 0; /* Handle a new event group, which will be initiated by a GTID event. */ if (event_type == GTID_EVENT) { @@ -221,9 +225,9 @@ handle_rpl_parallel_thread(void *arg) everything is stopped and cleaned up correctly. */ if (!sql_worker_killed(thd, rgi, in_event_group)) - rpt_handle_event(events, rpt); + err= rpt_handle_event(events, rpt); else - thd->wait_for_prior_commit(); + err= thd->wait_for_prior_commit(); end_of_group= in_event_group && @@ -272,7 +276,7 @@ handle_rpl_parallel_thread(void *arg) } mysql_mutex_unlock(&entry->LOCK_parallel_entry); - rgi->commit_orderer.wakeup_subsequent_commits(); + rgi->commit_orderer.wakeup_subsequent_commits(err); delete rgi; } @@ -431,6 +435,9 @@ rpl_parallel_change_thread_count(rpl_parallel_thread_pool *pool, mysql_mutex_lock(&pool->threads[i]->LOCK_rpl_thread); pool->threads[i]->delay_start= false; mysql_cond_signal(&pool->threads[i]->COND_rpl_thread); + while (!pool->threads[i]->running) + mysql_cond_wait(&pool->threads[i]->COND_rpl_thread, + &pool->threads[i]->LOCK_rpl_thread); mysql_mutex_unlock(&pool->threads[i]->LOCK_rpl_thread); } diff --git a/sql/share/errmsg-utf8.txt b/sql/share/errmsg-utf8.txt index 85baddd3c49..51bc7eaf93d 100644 --- a/sql/share/errmsg-utf8.txt +++ b/sql/share/errmsg-utf8.txt @@ -6557,3 +6557,5 @@ ER_STORED_FUNCTION_PREVENTS_SWITCH_GTID_DOMAIN_ID_SEQ_NO eng "Cannot modify @@session.gtid_domain_id or @@session.gtid_seq_no inside a stored function or trigger" ER_CHANGE_SLAVE_PARALLEL_THREADS_ACTIVE eng "Cannot change @@slave_parallel_threads while another change is in progress" +ER_PRIOR_COMMIT_FAILED + eng "Commit failed due to failure of an earlier commit on which this one depends" diff --git a/sql/sql_class.cc b/sql/sql_class.cc index f424e34969d..65fed2e8f98 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -610,9 +610,9 @@ void thd_set_ha_data(THD *thd, const struct handlerton *hton, @see thd_wakeup_subsequent_commits() definition in plugin.h */ extern "C" -void thd_wakeup_subsequent_commits(THD *thd) +void thd_wakeup_subsequent_commits(THD *thd, int wakeup_error) { - thd->wakeup_subsequent_commits(); + thd->wakeup_subsequent_commits(wakeup_error); } @@ -5618,7 +5618,8 @@ bool THD::rgi_have_temporary_tables() wait_for_commit::wait_for_commit() : subsequent_commits_list(0), next_subsequent_commit(0), waitee(0), opaque_pointer(0), - waiting_for_commit(false), wakeup_subsequent_commits_running(false) + waiting_for_commit(false), wakeup_error(0), + wakeup_subsequent_commits_running(false) { mysql_mutex_init(key_LOCK_wait_commit, &LOCK_wait_commit, MY_MUTEX_INIT_FAST); mysql_cond_init(key_COND_wait_commit, &COND_wait_commit, 0); @@ -5633,7 +5634,7 @@ wait_for_commit::~wait_for_commit() void -wait_for_commit::wakeup() +wait_for_commit::wakeup(int wakeup_error) { /* We signal each waiter on their own condition and mutex (rather than using @@ -5649,6 +5650,7 @@ wait_for_commit::wakeup() */ mysql_mutex_lock(&LOCK_wait_commit); waiting_for_commit= false; + this->wakeup_error= wakeup_error; mysql_mutex_unlock(&LOCK_wait_commit); mysql_cond_signal(&COND_wait_commit); } @@ -5675,6 +5677,7 @@ void wait_for_commit::register_wait_for_prior_commit(wait_for_commit *waitee) { waiting_for_commit= true; + wakeup_error= 0; DBUG_ASSERT(!this->waitee /* No prior registration allowed */); this->waitee= waitee; @@ -5704,7 +5707,7 @@ wait_for_commit::register_wait_for_prior_commit(wait_for_commit *waitee) with register_wait_for_prior_commit(). If the commit already completed, returns immediately. */ -void +int wait_for_commit::wait_for_prior_commit2() { mysql_mutex_lock(&LOCK_wait_commit); @@ -5712,6 +5715,7 @@ wait_for_commit::wait_for_prior_commit2() mysql_cond_wait(&COND_wait_commit, &LOCK_wait_commit); mysql_mutex_unlock(&LOCK_wait_commit); waitee= NULL; + return wakeup_error; } @@ -5755,7 +5759,7 @@ wait_for_commit::wait_for_prior_commit2() */ void -wait_for_commit::wakeup_subsequent_commits2() +wait_for_commit::wakeup_subsequent_commits2(int wakeup_error) { wait_for_commit *waiter; @@ -5772,7 +5776,7 @@ wait_for_commit::wakeup_subsequent_commits2() once the wakeup is done, the field could be invalidated at any time. */ wait_for_commit *next= waiter->next_subsequent_commit; - waiter->wakeup(); + waiter->wakeup(wakeup_error); waiter= next; } diff --git a/sql/sql_class.h b/sql/sql_class.h index 01121fd5b35..567eaf5c351 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -1614,6 +1614,8 @@ struct wait_for_commit cleared. */ bool waiting_for_commit; + /* The wakeup error code from the waitee. 0 means no error. */ + int wakeup_error; /* Flag set when wakeup_subsequent_commits_running() is active, see comments on that function for details. @@ -1621,16 +1623,18 @@ struct wait_for_commit bool wakeup_subsequent_commits_running; void register_wait_for_prior_commit(wait_for_commit *waitee); - void wait_for_prior_commit() + int wait_for_prior_commit() { /* Quick inline check, to avoid function call and locking in the common case where no wakeup is registered, or a registered wait was already signalled. */ if (waiting_for_commit) - wait_for_prior_commit2(); + return wait_for_prior_commit2(); + else + return wakeup_error; } - void wakeup_subsequent_commits() + void wakeup_subsequent_commits(int wakeup_error) { /* Do the check inline, so only the wakeup case takes the cost of a function @@ -1645,7 +1649,7 @@ struct wait_for_commit prevent a waiter from arriving just after releasing the lock. */ if (subsequent_commits_list) - wakeup_subsequent_commits2(); + wakeup_subsequent_commits2(wakeup_error); } void unregister_wait_for_prior_commit() { @@ -1653,10 +1657,10 @@ struct wait_for_commit unregister_wait_for_prior_commit2(); } - void wakeup(); + void wakeup(int wakeup_error); - void wait_for_prior_commit2(); - void wakeup_subsequent_commits2(); + int wait_for_prior_commit2(); + void wakeup_subsequent_commits2(int wakeup_error); void unregister_wait_for_prior_commit2(); wait_for_commit(); @@ -3308,15 +3312,21 @@ public: void signal_wakeup_ready(); wait_for_commit *wait_for_commit_ptr; - void wait_for_prior_commit() + int wait_for_prior_commit() { if (wait_for_commit_ptr) - wait_for_commit_ptr->wait_for_prior_commit(); + { + int err= wait_for_commit_ptr->wait_for_prior_commit(); + if (err) + my_error(ER_PRIOR_COMMIT_FAILED, MYF(0)); + return err; + } + return 0; } - void wakeup_subsequent_commits() + void wakeup_subsequent_commits(int wakeup_error) { if (wait_for_commit_ptr) - wait_for_commit_ptr->wakeup_subsequent_commits(); + wait_for_commit_ptr->wakeup_subsequent_commits(wakeup_error); } private: diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 4d4bb7bd1f3..f32037244c5 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -2927,7 +2927,7 @@ innobase_commit( /* At this point commit order is fixed and transaction is visible to others. So we can wakeup other commits waiting for this one, to allow then to group commit with us. */ - thd_wakeup_subsequent_commits(thd); + thd_wakeup_subsequent_commits(thd, 0); /* We did the first part already in innobase_commit_ordered(), Now finish by doing a write + flush of logs. */ diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index e80810d3948..2efcb15cba6 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -3588,7 +3588,7 @@ innobase_commit( /* At this point commit order is fixed and transaction is visible to others. So we can wakeup other commits waiting for this one, to allow then to group commit with us. */ - thd_wakeup_subsequent_commits(thd); + thd_wakeup_subsequent_commits(thd, 0); /* We did the first part already in innobase_commit_ordered(), Now finish by doing a write + flush of logs. */ From 5748eb3ec63adee035ec5305796bdbc67a654158 Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Tue, 15 Oct 2013 00:17:16 +0300 Subject: [PATCH 29/41] Moved the remaining variables, that depends on sql execution, from Relay_log_info to rpl_group_info: -row_stmt_start_timestamp -last_event_start_time -long_find_row_note -trans_retries Added slave_executed_entries_lock to protect rli->executed_entries Added primitives for thread safe 64 bit increment Update rli->executed_entries when event has executed, not when event has been sent to sql execution thread sql/log_event.cc: row_stmt_start and long_find_row_note is now in rpl_group_info sql/mysqld.cc: Added slave_executed_entries_lock to protect rli->executed_entries sql/mysqld.h: Added slave_executed_entries_lock to protect rli->executed_entries Added primitives for thread safe 64 bit increment sql/rpl_parallel.cc: Update rli->executed_entries when event has executed, not when event has been sent to sql execution thread sql/rpl_rli.cc: Moved row_stmt_start_timestamp, last_event_start_time and long_find_row_note from Relay_log_info to rpl_group_info sql/rpl_rli.h: Moved trans_retries, row_stmt_start_timestamp, last_event_start_time and long_find_row_note from Relay_log_info to rpl_group_info sql/slave.cc: Use rgi for trans_retries and last_event_start_time Update rli->executed_entries when event has executed, not when event has been sent to sql execution thread Reset trans_retries when object is created --- sql/log_event.cc | 12 ++--- sql/log_event_old.cc | 2 +- sql/mysqld.cc | 3 ++ sql/mysqld.h | 15 ++++++ sql/rpl_parallel.cc | 26 ++++++---- sql/rpl_rli.cc | 10 ++-- sql/rpl_rli.h | 119 +++++++++++++++++++++++-------------------- sql/slave.cc | 43 ++++++++-------- 8 files changed, 134 insertions(+), 96 deletions(-) diff --git a/sql/log_event.cc b/sql/log_event.cc index 59fc856c3f2..55166b65df4 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -9293,7 +9293,7 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi) set the initial time of this ROWS statement if it was not done before in some other ROWS event. */ - const_cast(rli)->set_row_stmt_start_timestamp(); + rgi->set_row_stmt_start_timestamp(); while (error == 0 && m_curr_row < m_rows_end) { @@ -11133,13 +11133,13 @@ static inline void issue_long_find_row_warning(Log_event_type type, const char *table_name, bool is_index_scan, - const Relay_log_info *rli) + rpl_group_info *rgi) { if ((global_system_variables.log_warnings > 1 && - !const_cast(rli)->is_long_find_row_note_printed())) + !rgi->is_long_find_row_note_printed())) { time_t now= my_time(0); - time_t stmt_ts= const_cast(rli)->get_row_stmt_start_timestamp(); + time_t stmt_ts= rgi->get_row_stmt_start_timestamp(); DBUG_EXECUTE_IF("inject_long_find_row_note", stmt_ts-=(LONG_FIND_ROW_THRESHOLD*2);); @@ -11148,7 +11148,7 @@ void issue_long_find_row_warning(Log_event_type type, if (delta > LONG_FIND_ROW_THRESHOLD) { - const_cast(rli)->set_long_find_row_note_printed(); + rgi->set_long_find_row_note_printed(); const char* evt_type= type == DELETE_ROWS_EVENT ? " DELETE" : "n UPDATE"; const char* scan_type= is_index_scan ? "scanning an index" : "scanning the table"; @@ -11477,7 +11477,7 @@ int Rows_log_event::find_row(rpl_group_info *rgi) end: if (is_table_scan || is_index_scan) issue_long_find_row_warning(get_type_code(), m_table->alias.c_ptr(), - is_index_scan, rgi->rli); + is_index_scan, rgi); table->default_column_bitmaps(); DBUG_RETURN(error); } diff --git a/sql/log_event_old.cc b/sql/log_event_old.cc index b4f28abcf2b..174219a8e72 100644 --- a/sql/log_event_old.cc +++ b/sql/log_event_old.cc @@ -1740,7 +1740,7 @@ int Old_rows_log_event::do_apply_event(rpl_group_info *rgi) problem. When WL#2975 is implemented, just remove the member Relay_log_info::last_event_start_time and all its occurrences. */ - const_cast(rli)->last_event_start_time= my_time(0); + rgi->last_event_start_time= my_time(0); } if (get_flags(STMT_END_F)) diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 9f1d9e48b1c..1e7deef8d89 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -492,6 +492,7 @@ my_atomic_rwlock_t global_query_id_lock; my_atomic_rwlock_t thread_running_lock; my_atomic_rwlock_t thread_count_lock; my_atomic_rwlock_t statistics_lock; +my_atomic_rwlock_t slave_executed_entries_lock; ulong aborted_threads, aborted_connects; ulong delayed_insert_timeout, delayed_insert_limit, delayed_queue_size; ulong delayed_insert_threads, delayed_insert_writes, delayed_rows_in_use; @@ -1939,6 +1940,7 @@ void clean_up(bool print_message) my_atomic_rwlock_destroy(&thread_running_lock); my_atomic_rwlock_destroy(&thread_count_lock); my_atomic_rwlock_destroy(&statistics_lock); + my_atomic_rwlock_destroy(&slave_executed_entries_lock); free_charsets(); mysql_mutex_lock(&LOCK_thread_count); DBUG_PRINT("quit", ("got thread count lock")); @@ -7550,6 +7552,7 @@ static int mysql_init_variables(void) my_atomic_rwlock_init(&thread_running_lock); my_atomic_rwlock_init(&thread_count_lock); my_atomic_rwlock_init(&statistics_lock); + my_atomic_rwlock_init(slave_executed_entries_lock); strmov(server_version, MYSQL_SERVER_VERSION); threads.empty(); thread_cache.empty(); diff --git a/sql/mysqld.h b/sql/mysqld.h index 0bd3687f4fb..e45b48f0332 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -367,6 +367,7 @@ extern mysql_cond_t COND_manager; extern int32 thread_running; extern int32 thread_count; extern my_atomic_rwlock_t thread_running_lock, thread_count_lock; +extern my_atomic_rwlock_t slave_executed_entries_lock; extern char *opt_ssl_ca, *opt_ssl_capath, *opt_ssl_cert, *opt_ssl_cipher, *opt_ssl_key; @@ -507,6 +508,20 @@ inline void thread_safe_decrement32(int32 *value, my_atomic_rwlock_t *lock) my_atomic_rwlock_wrunlock(lock); } +inline void thread_safe_increment64(int64 *value, my_atomic_rwlock_t *lock) +{ + my_atomic_rwlock_wrlock(lock); + (void) my_atomic_add64(value, 1); + my_atomic_rwlock_wrunlock(lock); +} + +inline void thread_safe_decrement64(int64 *value, my_atomic_rwlock_t *lock) +{ + my_atomic_rwlock_wrlock(lock); + (void) my_atomic_add64(value, -1); + my_atomic_rwlock_wrunlock(lock); +} + inline void inc_thread_running() { diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index c6411b01e60..19ae2a35339 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -9,10 +9,6 @@ ToDo list: - - Review every field in Relay_log_info, and all code that accesses it. - Split out the necessary parts into rpl_group_info, to avoid conflicts - between parallel execution of events. (Such as deferred events ...) - - Error handling. If we fail in one of multiple parallel executions, we need to make a best effort to complete prior transactions and roll back following transactions, so slave binlog position will be correct. @@ -43,10 +39,11 @@ slave rolls back the transaction; parallel execution needs to be able to deal with this wrt. commit_orderer and such. - - We should fail if we connect to the master with opt_slave_parallel_threads - greater than zero and master does not support GTID. Just to avoid a bunch - of potential problems, we won't be able to do any parallel replication - in this case anyway. + - We should notice if the master doesn't support GTID, and then run in + single threaded mode against that master. This is needed to be able to + support multi-master-replication with old and new masters. + + - Retry of failed transactions is not yet implemented for the parallel case. */ struct rpl_parallel_thread_pool global_rpl_thread_pool; @@ -56,7 +53,7 @@ static int rpt_handle_event(rpl_parallel_thread::queued_event *qev, struct rpl_parallel_thread *rpt) { - int err; + int err __attribute__((unused)); rpl_group_info *rgi= qev->rgi; Relay_log_info *rli= rgi->rli; THD *thd= rgi->thd; @@ -69,6 +66,9 @@ rpt_handle_event(rpl_parallel_thread::queued_event *qev, qev->ev->thd= thd; err= apply_event_and_update_pos(qev->ev, thd, rgi, rpt); thd->rgi_slave= NULL; + + thread_safe_increment64(&rli->executed_entries, + &slave_executed_entries_lock); /* ToDo: error handling. */ return err; } @@ -617,7 +617,10 @@ rpl_parallel::wait_for_done() /* do_event() is executed by the sql_driver_thd thread. - It's main purpose is to find a thread that can exectue the query. + It's main purpose is to find a thread that can execute the query. + + @retval false ok, event was accepted + @retval true error */ bool @@ -643,7 +646,10 @@ rpl_parallel::do_event(rpl_group_info *serial_rgi, Log_event *ev) rli->abort_slave) sql_thread_stopping= true; if (sql_thread_stopping) + { + /* QQ: Need a better comment why we return false here */ return false; + } if (!(qev= (rpl_parallel_thread::queued_event *)my_malloc(sizeof(*qev), MYF(0)))) diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index ae2b7558285..53481d2efaf 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -59,8 +59,7 @@ Relay_log_info::Relay_log_info(bool is_slave_recovery) abort_pos_wait(0), slave_run_id(0), sql_driver_thd(), inited(0), abort_slave(0), slave_running(0), until_condition(UNTIL_NONE), until_log_pos(0), retried_trans(0), executed_entries(0), - last_event_start_time(0), m_flags(0), - row_stmt_start_timestamp(0), long_find_row_note_printed(false) + m_flags(0) { DBUG_ENTER("Relay_log_info::Relay_log_info"); @@ -1420,7 +1419,8 @@ rpl_group_info::rpl_group_info(Relay_log_info *rli_) : rli(rli_), thd(0), gtid_sub_id(0), wait_commit_sub_id(0), wait_commit_group_info(0), wait_start_sub_id(0), parallel_entry(0), deferred_events(NULL), m_annotate_event(0), tables_to_lock(0), - tables_to_lock_count(0) + tables_to_lock_count(0), trans_retries(0), last_event_start_time(0), + row_stmt_start_timestamp(0), long_find_row_note_printed(false) { bzero(¤t_gtid, sizeof(current_gtid)); mysql_mutex_init(key_rpl_group_info_sleep_lock, &sleep_lock, @@ -1551,8 +1551,8 @@ void rpl_group_info::cleanup_context(THD *thd, bool error) - timestamp - flag that decides whether the slave prints or not */ - rli->reset_row_stmt_start_timestamp(); - rli->unset_long_find_row_note_printed(); + reset_row_stmt_start_timestamp(); + unset_long_find_row_note_printed(); DBUG_VOID_RETURN; } diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h index 9e96fb8e72c..68cd051be2a 100644 --- a/sql/rpl_rli.h +++ b/sql/rpl_rli.h @@ -298,14 +298,16 @@ public: char cached_charset[6]; /* - trans_retries varies between 0 to slave_transaction_retries and counts how - many times the slave has retried the present transaction; gets reset to 0 - when the transaction finally succeeds. retried_trans is a cumulative - counter: how many times the slave has retried a transaction (any) since - slave started. + retried_trans is a cumulative counter: how many times the slave + has retried a transaction (any) since slave started. + Protected by data_lock. */ - ulong trans_retries, retried_trans; - ulong executed_entries; /* For SLAVE STATUS */ + ulong retried_trans; + /* + Number of executed events for SLAVE STATUS. + Protected by slave_executed_entries_lock + */ + int64 executed_entries; /* If the end of the hot relay log is made of master's events ignored by the @@ -381,13 +383,6 @@ public: void cached_charset_invalidate(); bool cached_charset_compare(char *charset) const; - /* - Used to defer stopping the SQL thread to give it a chance - to finish up the current group of events. - The timestamp is set and reset in @c sql_slave_killed(). - */ - time_t last_event_start_time; - /** Helper function to do after statement completion. @@ -462,39 +457,6 @@ public: m_flags&= ~flag; } - time_t get_row_stmt_start_timestamp() - { - return row_stmt_start_timestamp; - } - - time_t set_row_stmt_start_timestamp() - { - if (row_stmt_start_timestamp == 0) - row_stmt_start_timestamp= my_time(0); - - return row_stmt_start_timestamp; - } - - void reset_row_stmt_start_timestamp() - { - row_stmt_start_timestamp= 0; - } - - void set_long_find_row_note_printed() - { - long_find_row_note_printed= true; - } - - void unset_long_find_row_note_printed() - { - long_find_row_note_printed= false; - } - - bool is_long_find_row_note_printed() - { - return long_find_row_note_printed; - } - private: /* @@ -504,13 +466,6 @@ private: relay log. */ uint32 m_flags; - - /* - Runtime state for printing a note when slave is taking - too long while processing a row event. - */ - time_t row_stmt_start_timestamp; - bool long_find_row_note_printed; }; @@ -592,6 +547,29 @@ struct rpl_group_info mysql_mutex_t sleep_lock; mysql_cond_t sleep_cond; + /* + trans_retries varies between 0 to slave_transaction_retries and counts how + many times the slave has retried the present transaction; gets reset to 0 + when the transaction finally succeeds. + */ + ulong trans_retries; + + /* + Used to defer stopping the SQL thread to give it a chance + to finish up the current group of events. + The timestamp is set and reset in @c sql_slave_killed(). + */ + time_t last_event_start_time; + +private: + /* + Runtime state for printing a note when slave is taking + too long while processing a row event. + */ + time_t row_stmt_start_timestamp; + bool long_find_row_note_printed; +public: + rpl_group_info(Relay_log_info *rli_); ~rpl_group_info(); @@ -673,6 +651,39 @@ struct rpl_group_info void clear_tables_to_lock(); void cleanup_context(THD *, bool); void slave_close_thread_tables(THD *); + + time_t get_row_stmt_start_timestamp() + { + return row_stmt_start_timestamp; + } + + time_t set_row_stmt_start_timestamp() + { + if (row_stmt_start_timestamp == 0) + row_stmt_start_timestamp= my_time(0); + + return row_stmt_start_timestamp; + } + + void reset_row_stmt_start_timestamp() + { + row_stmt_start_timestamp= 0; + } + + void set_long_find_row_note_printed() + { + long_find_row_note_printed= true; + } + + void unset_long_find_row_note_printed() + { + long_find_row_note_printed= false; + } + + bool is_long_find_row_note_printed() + { + return long_find_row_note_printed; + } }; diff --git a/sql/slave.cc b/sql/slave.cc index 2504f723a78..61c63cd2862 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -1034,9 +1034,9 @@ static bool sql_slave_killed(rpl_group_info *rgi) @c last_event_start_time the timer. */ - if (rli->last_event_start_time == 0) - rli->last_event_start_time= my_time(0); - ret= difftime(my_time(0), rli->last_event_start_time) <= + if (rgi->last_event_start_time == 0) + rgi->last_event_start_time= my_time(0); + ret= difftime(my_time(0), rgi->last_event_start_time) <= SLAVE_WAIT_GROUP_DONE ? FALSE : TRUE; DBUG_EXECUTE_IF("stop_slave_middle_group", @@ -1070,7 +1070,7 @@ static bool sql_slave_killed(rpl_group_info *rgi) } } if (ret) - rli->last_event_start_time= 0; + rgi->last_event_start_time= 0; DBUG_RETURN(ret); } @@ -3047,10 +3047,10 @@ int apply_event_and_update_pos(Log_event* ev, THD* thd, DBUG_PRINT("exec_event",("%s(type_code: %d; server_id: %d)", ev->get_type_str(), ev->get_type_code(), ev->server_id)); - DBUG_PRINT("info", ("thd->options: %s%s; rli->last_event_start_time: %lu", + DBUG_PRINT("info", ("thd->options: %s%s; rgi->last_event_start_time: %lu", FLAGSTR(thd->variables.option_bits, OPTION_NOT_AUTOCOMMIT), FLAGSTR(thd->variables.option_bits, OPTION_BEGIN), - (ulong) rli->last_event_start_time)); + (ulong) rgi->last_event_start_time)); /* Execute the event to change the database and update the binary @@ -3385,14 +3385,16 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, Note, if lock wait timeout (innodb_lock_wait_timeout exceeded) there is no rollback since 5.0.13 (ref: manual). We have to not only seek but also - a) init_master_info(), to seek back to hot relay log's start for later - (for when we will come back to this hot log after re-processing the - possibly existing old logs where BEGIN is: check_binlog_magic() will - then need the cache to be at position 0 (see comments at beginning of + + a) init_master_info(), to seek back to hot relay log's start + for later (for when we will come back to this hot log after + re-processing the possibly existing old logs where BEGIN is: + check_binlog_magic() will then need the cache to be at + position 0 (see comments at beginning of init_master_info()). b) init_relay_log_pos(), because the BEGIN may be an older relay log. */ - if (rli->trans_retries < slave_trans_retries) + if (serial_rgi->trans_retries < slave_trans_retries) { if (init_master_info(rli->mi, 0, 0, 0, SLAVE_SQL)) sql_print_error("Failed to initialize the master info structure"); @@ -3407,15 +3409,17 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, exec_res= 0; serial_rgi->cleanup_context(thd, 1); /* chance for concurrent connection to get more locks */ - slave_sleep(thd, min(rli->trans_retries, MAX_SLAVE_RETRY_PAUSE), + slave_sleep(thd, min(serial_rgi->trans_retries, + MAX_SLAVE_RETRY_PAUSE), sql_slave_killed, serial_rgi); + serial_rgi->trans_retries++; mysql_mutex_lock(&rli->data_lock); // because of SHOW STATUS - rli->trans_retries++; rli->retried_trans++; statistic_increment(slave_retried_transactions, LOCK_status); mysql_mutex_unlock(&rli->data_lock); DBUG_PRINT("info", ("Slave retries transaction " - "rli->trans_retries: %lu", rli->trans_retries)); + "rgi->trans_retries: %lu", + serial_rgi->trans_retries)); } } else @@ -3434,11 +3438,13 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, event, the execution will proceed as usual; in the case of a non-transient error, the slave will stop with an error. */ - rli->trans_retries= 0; // restart from fresh - DBUG_PRINT("info", ("Resetting retry counter, rli->trans_retries: %lu", - rli->trans_retries)); + serial_rgi->trans_retries= 0; // restart from fresh + DBUG_PRINT("info", ("Resetting retry counter, rgi->trans_retries: %lu", + serial_rgi->trans_retries)); } } + thread_safe_increment64(&rli->executed_entries, + &slave_executed_entries_lock); DBUG_RETURN(exec_res); } mysql_mutex_unlock(&rli->data_lock); @@ -4179,8 +4185,6 @@ pthread_handler_t handle_slave_sql(void *arg) mysql_mutex_lock(&rli->log_space_lock); rli->ignore_log_space_limit= 0; mysql_mutex_unlock(&rli->log_space_lock); - rli->trans_retries= 0; // start from "no error" - DBUG_PRINT("info", ("rli->trans_retries: %lu", rli->trans_retries)); if (init_relay_log_pos(rli, rli->group_relay_log_name, @@ -4406,7 +4410,6 @@ the slave SQL thread with \"SLAVE START\". We stopped at log \ } goto err; } - rli->executed_entries++; } if (opt_slave_parallel_threads > 0) From fcaf1e6a82e2a9f6914b72ea9307c7d91d194150 Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Tue, 15 Oct 2013 00:18:48 +0300 Subject: [PATCH 30/41] Flush the proc file after every modifications. This will avoid errors of type "Table './mysql/proc' is marked as crashed and should be repaired" --- sql/sp.cc | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sql/sp.cc b/sql/sp.cc index 56565f1d11e..f60ca49ca7b 100644 --- a/sql/sp.cc +++ b/sql/sp.cc @@ -1167,6 +1167,9 @@ sp_create_routine(THD *thd, stored_procedure_type type, sp_head *sp) ret= SP_OK; if (table->file->ha_write_row(table->record[0])) ret= SP_WRITE_ROW_FAILED; + /* Make change permanent and avoid 'table is marked as crashed' errors */ + table->file->extra(HA_EXTRA_FLUSH); + if (ret == SP_OK) sp_cache_invalidate(); @@ -1256,6 +1259,8 @@ sp_drop_routine(THD *thd, stored_procedure_type type, sp_name *name) { if (table->file->ha_delete_row(table->record[0])) ret= SP_DELETE_ROW_FAILED; + /* Make change permanent and avoid 'table is marked as crashed' errors */ + table->file->extra(HA_EXTRA_FLUSH); } if (ret == SP_OK) @@ -1366,6 +1371,8 @@ sp_update_routine(THD *thd, stored_procedure_type type, sp_name *name, ret= SP_WRITE_ROW_FAILED; else ret= 0; + /* Make change permanent and avoid 'table is marked as crashed' errors */ + table->file->extra(HA_EXTRA_FLUSH); } if (ret == SP_OK) @@ -1540,7 +1547,11 @@ sp_drop_db_routines(THD *thd, char *db) if (nxtres != HA_ERR_END_OF_FILE) ret= SP_KEY_NOT_FOUND; if (deleted) + { sp_cache_invalidate(); + /* Make change permanent and avoid 'table is marked as crashed' errors */ + table->file->extra(HA_EXTRA_FLUSH); + } } table->file->ha_index_end(); From 7681c6aa787f9d3402059957bd8d993997cb623b Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 17 Oct 2013 14:11:19 +0200 Subject: [PATCH 31/41] MDEV-4506: Parallel replication: Intermediate commit. Fix some part of update of old-style coordinates in parallel replication: - Ignore XtraDB request for old-style coordinates, not meaningful for parallel replication (must use GTID to get crash-safe parallel slave). - Only update relay log coordinates forward, not backwards, to ensure that parallel threads do not conflict with each other. - Move future_event_relay_log_pos to rgi. --- sql/log_event.cc | 56 +++++++++++++++++++++++--------------------- sql/log_event_old.cc | 2 +- sql/rpl_parallel.cc | 2 ++ sql/rpl_parallel.h | 1 + sql/rpl_rli.cc | 32 ++++++++++++++++++------- sql/rpl_rli.h | 15 ++++++++---- sql/slave.cc | 3 ++- 7 files changed, 69 insertions(+), 42 deletions(-) diff --git a/sql/log_event.cc b/sql/log_event.cc index 55166b65df4..cd6da8baa22 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -3881,7 +3881,7 @@ int Query_log_event::do_apply_event(rpl_group_info *rgi, future-change-proof addon, e.g if COMMIT handling will start checking invariants like IN_STMT flag must be off at committing the transaction. */ - const_cast(rli)->inc_event_relay_log_pos(); + rgi->inc_event_relay_log_pos(); const_cast(rli)->clear_flag(Relay_log_info::IN_STMT); } else @@ -4249,7 +4249,6 @@ end: int Query_log_event::do_update_pos(rpl_group_info *rgi) { - Relay_log_info *rli= rgi->rli; /* Note that we will not increment group* positions if we are just after a SET ONE_SHOT, because SET ONE_SHOT should not be separated @@ -4257,7 +4256,7 @@ int Query_log_event::do_update_pos(rpl_group_info *rgi) */ if (thd->one_shot_set) { - rli->inc_event_relay_log_pos(); + rgi->inc_event_relay_log_pos(); return 0; } else @@ -4864,7 +4863,6 @@ int Format_description_log_event::do_apply_event(rpl_group_info *rgi) int Format_description_log_event::do_update_pos(rpl_group_info *rgi) { - Relay_log_info *rli= rgi->rli; if (server_id == (uint32) global_system_variables.server_id) { /* @@ -4880,7 +4878,7 @@ int Format_description_log_event::do_update_pos(rpl_group_info *rgi) Intvar_log_event instead of starting at a Table_map_log_event or the Intvar_log_event respectively. */ - rli->inc_event_relay_log_pos(); + rgi->inc_event_relay_log_pos(); return 0; } else @@ -5955,7 +5953,7 @@ int Rotate_log_event::do_update_pos(rpl_group_info *rgi) (ulong) rli->group_master_log_pos)); memcpy(rli->group_master_log_name, new_log_ident, ident_len+1); rli->notify_group_master_log_name_update(); - rli->inc_group_relay_log_pos(pos, TRUE /* skip_lock */); + rli->inc_group_relay_log_pos(pos, rgi, TRUE /* skip_lock */); DBUG_PRINT("info", ("new group_master_log_name: '%s' " "new group_master_log_pos: %lu", rli->group_master_log_name, @@ -5978,7 +5976,7 @@ int Rotate_log_event::do_update_pos(rpl_group_info *rgi) thd->variables.auto_increment_offset= 1; } else - rli->inc_event_relay_log_pos(); + rgi->inc_event_relay_log_pos(); DBUG_RETURN(0); @@ -6290,8 +6288,7 @@ Gtid_log_event::do_apply_event(rpl_group_info *rgi) int Gtid_log_event::do_update_pos(rpl_group_info *rgi) { - Relay_log_info *rli= rgi->rli; - rli->inc_event_relay_log_pos(); + rgi->inc_event_relay_log_pos(); return 0; } @@ -6723,8 +6720,7 @@ int Intvar_log_event::do_apply_event(rpl_group_info *rgi) int Intvar_log_event::do_update_pos(rpl_group_info *rgi) { - Relay_log_info *rli= rgi->rli; - rli->inc_event_relay_log_pos(); + rgi->inc_event_relay_log_pos(); return 0; } @@ -6820,8 +6816,7 @@ int Rand_log_event::do_apply_event(rpl_group_info *rgi) int Rand_log_event::do_update_pos(rpl_group_info *rgi) { - Relay_log_info *rli= rgi->rli; - rli->inc_event_relay_log_pos(); + rgi->inc_event_relay_log_pos(); return 0; } @@ -7485,8 +7480,7 @@ int User_var_log_event::do_apply_event(rpl_group_info *rgi) int User_var_log_event::do_update_pos(rpl_group_info *rgi) { - Relay_log_info *rli= rgi->rli; - rli->inc_event_relay_log_pos(); + rgi->inc_event_relay_log_pos(); return 0; } @@ -7717,11 +7711,11 @@ int Stop_log_event::do_update_pos(rpl_group_info *rgi) the target position when in fact we have not. */ if (rli->get_flag(Relay_log_info::IN_TRANSACTION)) - rli->inc_event_relay_log_pos(); + rgi->inc_event_relay_log_pos(); else { rpl_global_gtid_slave_state.record_and_update_gtid(thd, rgi); - rli->inc_group_relay_log_pos(0); + rli->inc_group_relay_log_pos(0, rgi); flush_relay_log_info(rli); } DBUG_RETURN(0); @@ -9543,7 +9537,7 @@ Rows_log_event::do_update_pos(rpl_group_info *rgi) } else { - rli->inc_event_relay_log_pos(); + rgi->inc_event_relay_log_pos(); } DBUG_RETURN(error); @@ -9767,8 +9761,7 @@ int Annotate_rows_log_event::do_apply_event(rpl_group_info *rgi) #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) int Annotate_rows_log_event::do_update_pos(rpl_group_info *rgi) { - Relay_log_info *rli= rgi->rli; - rli->inc_event_relay_log_pos(); + rgi->inc_event_relay_log_pos(); return 0; } #endif @@ -10395,8 +10388,7 @@ Table_map_log_event::do_shall_skip(rpl_group_info *rgi) int Table_map_log_event::do_update_pos(rpl_group_info *rgi) { - Relay_log_info *rli= rgi->rli; - rli->inc_event_relay_log_pos(); + rgi->inc_event_relay_log_pos(); return 0; } @@ -11930,11 +11922,21 @@ bool rpl_get_position_info(const char **log_file_name, ulonglong *log_pos, return FALSE; #else const Relay_log_info *rli= &(active_mi->rli); - *log_file_name= rli->group_master_log_name; - *log_pos= rli->group_master_log_pos + - (rli->future_event_relay_log_pos - rli->group_relay_log_pos); - *group_relay_log_name= rli->group_relay_log_name; - *relay_log_pos= rli->future_event_relay_log_pos; + if (opt_slave_parallel_threads == 0) + { + *log_file_name= rli->group_master_log_name; + *log_pos= rli->group_master_log_pos + + (rli->future_event_relay_log_pos - rli->group_relay_log_pos); + *group_relay_log_name= rli->group_relay_log_name; + *relay_log_pos= rli->future_event_relay_log_pos; + } + else + { + *log_file_name= ""; + *log_pos= 0; + *group_relay_log_name= ""; + *relay_log_pos= 0; + } return TRUE; #endif } diff --git a/sql/log_event_old.cc b/sql/log_event_old.cc index 174219a8e72..cc212d6051b 100644 --- a/sql/log_event_old.cc +++ b/sql/log_event_old.cc @@ -1839,7 +1839,7 @@ Old_rows_log_event::do_update_pos(rpl_group_info *rgi) } else { - rli->inc_event_relay_log_pos(); + rgi->inc_event_relay_log_pos(); } DBUG_RETURN(error); diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index 19ae2a35339..fbf135c0bb6 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -64,6 +64,7 @@ rpt_handle_event(rpl_parallel_thread::queued_event *qev, /* ToDo: Access to thd, and what about rli, split out a parallel part? */ mysql_mutex_lock(&rli->data_lock); qev->ev->thd= thd; + rgi->future_event_relay_log_pos= qev->future_event_relay_log_pos; err= apply_event_and_update_pos(qev->ev, thd, rgi, rpt); thd->rgi_slave= NULL; @@ -659,6 +660,7 @@ rpl_parallel::do_event(rpl_group_info *serial_rgi, Log_event *ev) } qev->ev= ev; qev->next= NULL; + qev->future_event_relay_log_pos= rli->future_event_relay_log_pos; if (typ == GTID_EVENT) { diff --git a/sql/rpl_parallel.h b/sql/rpl_parallel.h index b9106392faf..7830470a929 100644 --- a/sql/rpl_parallel.h +++ b/sql/rpl_parallel.h @@ -23,6 +23,7 @@ struct rpl_parallel_thread { queued_event *next; Log_event *ev; rpl_group_info *rgi; + ulonglong future_event_relay_log_pos; } *event_queue, *last_in_queue; }; diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index 53481d2efaf..0ea6b1e5d13 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -869,17 +869,33 @@ improper_arguments: %d timed_out: %d", void Relay_log_info::inc_group_relay_log_pos(ulonglong log_pos, - bool skip_lock) + rpl_group_info *rgi, + bool skip_lock) { DBUG_ENTER("Relay_log_info::inc_group_relay_log_pos"); if (!skip_lock) mysql_mutex_lock(&data_lock); - inc_event_relay_log_pos(); - group_relay_log_pos= event_relay_log_pos; - strmake_buf(group_relay_log_name,event_relay_log_name); - - notify_group_relay_log_name_update(); + rgi->inc_event_relay_log_pos(); + if (opt_slave_parallel_threads > 0) + { + /* In case of parallel replication, do not update the position backwards. */ + int cmp= strcmp(group_relay_log_name, event_relay_log_name); + if (cmp < 0) + { + group_relay_log_pos= event_relay_log_pos; + strmake_buf(group_relay_log_name, event_relay_log_name); + notify_group_relay_log_name_update(); + } else if (cmp == 0 && group_relay_log_pos < event_relay_log_pos) + group_relay_log_pos= event_relay_log_pos; + } + else + { + /* Non-parallel case. */ + group_relay_log_pos= event_relay_log_pos; + strmake_buf(group_relay_log_name, event_relay_log_name); + notify_group_relay_log_name_update(); + } /* If the slave does not support transactions and replicates a transaction, @@ -1226,10 +1242,10 @@ void Relay_log_info::stmt_done(my_off_t event_master_log_pos, */ if ((rgi->thd->variables.option_bits & OPTION_BEGIN) && opt_using_transactions) - inc_event_relay_log_pos(); + rgi->inc_event_relay_log_pos(); else { - inc_group_relay_log_pos(event_master_log_pos); + inc_group_relay_log_pos(event_master_log_pos, rgi); if (rpl_global_gtid_slave_state.record_and_update_gtid(thd, rgi)) { report(WARNING_LEVEL, ER_CANNOT_UPDATE_GTID_STATE, diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h index 68cd051be2a..92f65a1397d 100644 --- a/sql/rpl_rli.h +++ b/sql/rpl_rli.h @@ -351,13 +351,9 @@ public: if (until_condition==UNTIL_MASTER_POS) until_log_names_cmp_result= UNTIL_LOG_NAMES_CMP_UNKNOWN; } - - inline void inc_event_relay_log_pos() - { - event_relay_log_pos= future_event_relay_log_pos; - } void inc_group_relay_log_pos(ulonglong log_pos, + rpl_group_info *rgi, bool skip_lock=0); int wait_for_pos(THD* thd, String* log_name, longlong log_pos, @@ -561,6 +557,8 @@ struct rpl_group_info */ time_t last_event_start_time; + ulonglong future_event_relay_log_pos; + private: /* Runtime state for printing a note when slave is taking @@ -684,6 +682,13 @@ public: { return long_find_row_note_printed; } + + inline void inc_event_relay_log_pos() + { + if (opt_slave_parallel_threads == 0 || + rli->event_relay_log_pos < future_event_relay_log_pos) + rli->event_relay_log_pos= future_event_relay_log_pos; + } }; diff --git a/sql/slave.cc b/sql/slave.cc index 61c63cd2862..50960991faf 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -3331,7 +3331,7 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, rli->abort_slave= 1; mysql_mutex_unlock(&rli->data_lock); delete ev; - rli->inc_event_relay_log_pos(); + serial_rgi->inc_event_relay_log_pos(); DBUG_RETURN(0); };); } @@ -3360,6 +3360,7 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, DBUG_RETURN(1); } + serial_rgi->future_event_relay_log_pos= rli->future_event_relay_log_pos; exec_res= apply_event_and_update_pos(ev, thd, serial_rgi, NULL); delete_or_keep_event_post_apply(serial_rgi, typ, ev); From a09d2b105f8e56e8fec98975ea9fa091c263327a Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 23 Oct 2013 15:03:03 +0200 Subject: [PATCH 32/41] MDEV-4506: Parallel replication. Fix some more parts of old-style position updates. Now we save in rgi some coordinates for master log and relay log, so that in do_update_pos() we can use the right set of coordinates with the right events. The Rotate_log_event::do_update_pos() is fixed in the parallel case to not directly update relay-log.info (as Rotate event runs directly in the driver SQL thread, ahead of actual event execution). Instead, group_master_log_file is updated as part of do_update_pos() in each event execution. In the parallel case, position updates happen in parallel without any ordering, but taking care that position is not updated backwards. Since position update happens only after event execution this leads to the right result. Also fix an access-after-free introduced in an earlier commit. --- sql/log_event.cc | 32 +++++++------------------------- sql/rpl_parallel.cc | 16 ++++++++++++++++ sql/rpl_parallel.h | 3 +++ sql/rpl_rli.cc | 25 ++++++++++++++++++------- sql/rpl_rli.h | 26 +++++++++++++------------- sql/slave.cc | 2 ++ sql/sys_vars.cc | 2 ++ 7 files changed, 61 insertions(+), 45 deletions(-) diff --git a/sql/log_event.cc b/sql/log_event.cc index cd6da8baa22..e7c0506a50a 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -3843,17 +3843,6 @@ int Query_log_event::do_apply_event(rpl_group_info *rgi, thd->variables.auto_increment_increment= auto_increment_increment; thd->variables.auto_increment_offset= auto_increment_offset; - /* - InnoDB internally stores the master log position it has executed so far, - i.e. the position just after the COMMIT event. - When InnoDB will want to store, the positions in rli won't have - been updated yet, so group_master_log_* will point to old BEGIN - and event_master_log* will point to the beginning of current COMMIT. - But log_pos of the COMMIT Query event is what we want, i.e. the pos of the - END of the current log event (COMMIT). We save it in rli so that InnoDB can - access it. - */ - const_cast(rli)->future_group_master_log_pos= log_pos; DBUG_PRINT("info", ("log_pos: %lu", (ulong) log_pos)); clear_all_errors(thd, const_cast(rli)); @@ -3882,7 +3871,6 @@ int Query_log_event::do_apply_event(rpl_group_info *rgi, invariants like IN_STMT flag must be off at committing the transaction. */ rgi->inc_event_relay_log_pos(); - const_cast(rli)->clear_flag(Relay_log_info::IN_STMT); } else { @@ -5535,16 +5523,6 @@ int Load_log_event::do_apply_event(NET* net, rpl_group_info *rgi, thd->lex->local_file= local_fname; mysql_reset_thd_for_next_command(thd, 0); - if (!use_rli_only_for_errors) - { - /* - Saved for InnoDB, see comment in - Query_log_event::do_apply_event() - */ - const_cast(rli)->future_group_master_log_pos= log_pos; - DBUG_PRINT("info", ("log_pos: %lu", (ulong) log_pos)); - } - /* We test replicate_*_db rules. Note that we have already prepared the file to load, even if we are going to ignore and delete it @@ -5940,11 +5918,16 @@ int Rotate_log_event::do_update_pos(rpl_group_info *rgi) correspond to the beginning of the transaction. Starting from 5.0.0, there also are some rotates from the slave itself, in the relay log, which shall not change the group positions. + + In parallel replication, rotate event is executed out-of-band with normal + events, so we cannot update group_master_log_name or _pos here, it will + be updated with the next normal event instead. */ if ((server_id != global_system_variables.server_id || rli->replicate_same_server_id) && !is_relay_log_event() && - !rli->is_in_group()) + !rli->is_in_group() && + !rgi->is_parallel_exec) { mysql_mutex_lock(&rli->data_lock); DBUG_PRINT("info", ("old group_master_log_name: '%s' " @@ -7712,7 +7695,7 @@ int Stop_log_event::do_update_pos(rpl_group_info *rgi) */ if (rli->get_flag(Relay_log_info::IN_TRANSACTION)) rgi->inc_event_relay_log_pos(); - else + else if (!rgi->is_parallel_exec) { rpl_global_gtid_slave_state.record_and_update_gtid(thd, rgi); rli->inc_group_relay_log_pos(0, rgi); @@ -8408,7 +8391,6 @@ int Execute_load_log_event::do_apply_event(rpl_group_info *rgi) calls mysql_load()). */ - const_cast(rli)->future_group_master_log_pos= log_pos; if (lev->do_apply_event(0,rgi,1)) { /* diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index fbf135c0bb6..8942b1d0a33 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -64,7 +64,11 @@ rpt_handle_event(rpl_parallel_thread::queued_event *qev, /* ToDo: Access to thd, and what about rli, split out a parallel part? */ mysql_mutex_lock(&rli->data_lock); qev->ev->thd= thd; + strcpy(rgi->event_relay_log_name_buf, qev->event_relay_log_name); + rgi->event_relay_log_name= rgi->event_relay_log_name_buf; + rgi->event_relay_log_pos= qev->event_relay_log_pos; rgi->future_event_relay_log_pos= qev->future_event_relay_log_pos; + strcpy(rgi->future_event_master_log_name, qev->future_event_master_log_name); err= apply_event_and_update_pos(qev->ev, thd, rgi, rpt); thd->rgi_slave= NULL; @@ -660,7 +664,10 @@ rpl_parallel::do_event(rpl_group_info *serial_rgi, Log_event *ev) } qev->ev= ev; qev->next= NULL; + strcpy(qev->event_relay_log_name, rli->event_relay_log_name); + qev->event_relay_log_pos= rli->event_relay_log_pos; qev->future_event_relay_log_pos= rli->future_event_relay_log_pos; + strcpy(qev->future_event_master_log_name, rli->future_event_master_log_name); if (typ == GTID_EVENT) { @@ -674,6 +681,7 @@ rpl_parallel::do_event(rpl_group_info *serial_rgi, Log_event *ev) delete rgi; return true; } + rgi->is_parallel_exec = true; if ((rgi->deferred_events_collecting= rli->mi->rpl_filter->is_on())) rgi->deferred_events= new Deferred_log_events(rli); @@ -783,6 +791,14 @@ rpl_parallel::do_event(rpl_group_info *serial_rgi, Log_event *ev) have GTID, like a MariaDB 5.5 or MySQL master. */ qev->rgi= serial_rgi; + /* Handle master log name change, seen in Rotate_log_event. */ + if (typ == ROTATE_EVENT) + { + Rotate_log_event *rev= static_cast(qev->ev); + memcpy(rli->future_event_master_log_name, + rev->new_log_ident, rev->ident_len+1); + } + rpt_handle_event(qev, NULL); delete_or_keep_event_post_apply(serial_rgi, typ, qev->ev); my_free(qev); diff --git a/sql/rpl_parallel.h b/sql/rpl_parallel.h index 7830470a929..7057ec66de2 100644 --- a/sql/rpl_parallel.h +++ b/sql/rpl_parallel.h @@ -24,6 +24,9 @@ struct rpl_parallel_thread { Log_event *ev; rpl_group_info *rgi; ulonglong future_event_relay_log_pos; + char event_relay_log_name[FN_REFLEN]; + char future_event_master_log_name[FN_REFLEN]; + ulonglong event_relay_log_pos; } *event_queue, *last_in_queue; }; diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index 0ea6b1e5d13..f3a6863d217 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -877,7 +877,9 @@ void Relay_log_info::inc_group_relay_log_pos(ulonglong log_pos, if (!skip_lock) mysql_mutex_lock(&data_lock); rgi->inc_event_relay_log_pos(); - if (opt_slave_parallel_threads > 0) + DBUG_PRINT("info", ("log_pos: %lu group_master_log_pos: %lu", + (long) log_pos, (long) group_master_log_pos)); + if (rgi->is_parallel_exec) { /* In case of parallel replication, do not update the position backwards. */ int cmp= strcmp(group_relay_log_name, event_relay_log_name); @@ -888,6 +890,18 @@ void Relay_log_info::inc_group_relay_log_pos(ulonglong log_pos, notify_group_relay_log_name_update(); } else if (cmp == 0 && group_relay_log_pos < event_relay_log_pos) group_relay_log_pos= event_relay_log_pos; + + cmp= strcmp(group_master_log_name, rgi->future_event_master_log_name); + if (cmp <= 0) + { + if (cmp < 0) + { + strcpy(group_master_log_name, rgi->future_event_master_log_name); + notify_group_master_log_name_update(); + } + if (group_master_log_pos < log_pos) + group_master_log_pos= log_pos; + } } else { @@ -895,6 +909,8 @@ void Relay_log_info::inc_group_relay_log_pos(ulonglong log_pos, group_relay_log_pos= event_relay_log_pos; strmake_buf(group_relay_log_name, event_relay_log_name); notify_group_relay_log_name_update(); + if (log_pos) // 3.23 binlogs don't have log_posx + group_master_log_pos= log_pos; } /* @@ -927,12 +943,6 @@ void Relay_log_info::inc_group_relay_log_pos(ulonglong log_pos, the relay log is not "val". With the end_log_pos solution, we avoid computations involving lengthes. */ - DBUG_PRINT("info", ("log_pos: %lu group_master_log_pos: %lu", - (long) log_pos, (long) group_master_log_pos)); - if (log_pos) // 3.23 binlogs don't have log_posx - { - group_master_log_pos= log_pos; - } mysql_cond_broadcast(&data_cond); if (!skip_lock) mysql_mutex_unlock(&data_lock); @@ -1436,6 +1446,7 @@ rpl_group_info::rpl_group_info(Relay_log_info *rli_) wait_commit_group_info(0), wait_start_sub_id(0), parallel_entry(0), deferred_events(NULL), m_annotate_event(0), tables_to_lock(0), tables_to_lock_count(0), trans_retries(0), last_event_start_time(0), + is_parallel_exec(false), row_stmt_start_timestamp(0), long_find_row_note_printed(false) { bzero(¤t_gtid, sizeof(current_gtid)); diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h index 92f65a1397d..38268ee85c5 100644 --- a/sql/rpl_rli.h +++ b/sql/rpl_rli.h @@ -185,6 +185,10 @@ public: char event_relay_log_name[FN_REFLEN]; ulonglong event_relay_log_pos; ulonglong future_event_relay_log_pos; + /* + The master log name for current event. Only used in parallel replication. + */ + char future_event_master_log_name[FN_REFLEN]; #ifdef HAVE_valgrind bool is_fake; /* Mark that this is a fake relay log info structure */ @@ -216,18 +220,6 @@ public: */ bool sql_force_rotate_relay; - /* - When it commits, InnoDB internally stores the master log position it has - processed so far; the position to store is the one of the end of the - committing event (the COMMIT query event, or the event if in autocommit - mode). - */ -#if MYSQL_VERSION_ID < 40100 - ulonglong future_master_log_pos; -#else - ulonglong future_group_master_log_pos; -#endif - time_t last_master_timestamp; void clear_until_condition(); @@ -557,7 +549,15 @@ struct rpl_group_info */ time_t last_event_start_time; + char *event_relay_log_name; + char event_relay_log_name_buf[FN_REFLEN]; + ulonglong event_relay_log_pos; ulonglong future_event_relay_log_pos; + /* + The master log name for current event. Only used in parallel replication. + */ + char future_event_master_log_name[FN_REFLEN]; + bool is_parallel_exec; private: /* @@ -685,7 +685,7 @@ public: inline void inc_event_relay_log_pos() { - if (opt_slave_parallel_threads == 0 || + if (!is_parallel_exec || rli->event_relay_log_pos < future_event_relay_log_pos) rli->event_relay_log_pos= future_event_relay_log_pos; } diff --git a/sql/slave.cc b/sql/slave.cc index 50960991faf..e73e3e14c10 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -3361,6 +3361,8 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, } serial_rgi->future_event_relay_log_pos= rli->future_event_relay_log_pos; + serial_rgi->event_relay_log_name= rli->event_relay_log_name; + serial_rgi->event_relay_log_pos= rli->event_relay_log_pos; exec_res= apply_event_and_update_pos(ev, thd, serial_rgi, NULL); delete_or_keep_event_post_apply(serial_rgi, typ, ev); diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 91f13bebd12..d509a614b6e 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -4228,6 +4228,8 @@ static bool check_pseudo_slave_mode(sys_var *self, THD *thd, set_var *var) #ifndef EMBEDDED_LIBRARY delete thd->rli_fake; thd->rli_fake= NULL; + delete thd->rgi_fake; + thd->rgi_fake= NULL; #endif } else if (previous_val && val) From 96a4f1f62862883de4bf14268cfe28d5bf187f49 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 24 Oct 2013 08:53:48 +0200 Subject: [PATCH 33/41] MDEV-4506: Parallel replication: Update some comments. --- sql/rpl_parallel.cc | 18 ++++-------------- sql/rpl_rli.cc | 4 ++++ 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index 8942b1d0a33..842dcefa0a6 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -14,20 +14,14 @@ following transactions, so slave binlog position will be correct. And all the retry logic for temporary errors like deadlock. - - Stopping the slave needs to handle stopping all parallel executions. And - the logic in sql_slave_killed() that waits for current event group to - complete needs to be extended appropriately... - - - Audit the use of Relay_log_info::data_lock. Make sure it is held - correctly in all needed places also when using parallel replication. - - We need some user-configurable limit on how far ahead the SQL thread will fetch and queue events for parallel execution (otherwise if slave gets behind we will fill up memory with pending malloc()'ed events). - - Fix update of relay-log.info and master.info. In non-GTID replication, - they must be serialised to preserve correctness. In GTID replication, we - should not update them at all except at slave thread stop. + - In GTID replication, we should not need to update master.info and + relay-log.info on disk at all except at slave thread stop. They are not + used to know where to restart, the updates are not crash-safe, and it + could negatively affect performance. - All the waits (eg. in struct wait_for_commit and in rpl_parallel_thread_pool::get_thread()) need to be killable. And on kill, @@ -39,10 +33,6 @@ slave rolls back the transaction; parallel execution needs to be able to deal with this wrt. commit_orderer and such. - - We should notice if the master doesn't support GTID, and then run in - single threaded mode against that master. This is needed to be able to - support multi-master-replication with old and new masters. - - Retry of failed transactions is not yet implemented for the parallel case. */ diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index f3a6863d217..d8a604cfe32 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -891,6 +891,10 @@ void Relay_log_info::inc_group_relay_log_pos(ulonglong log_pos, } else if (cmp == 0 && group_relay_log_pos < event_relay_log_pos) group_relay_log_pos= event_relay_log_pos; + /* + In the parallel case we need to update the master_log_name here, rather + than in Rotate_log_event::do_update_pos(). + */ cmp= strcmp(group_master_log_name, rgi->future_event_master_log_name); if (cmp <= 0) { From ee8a8162086b29022c304b270369439a3aaaf8a5 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 24 Oct 2013 12:44:21 +0200 Subject: [PATCH 34/41] MDEV-4506: Parallel replication. Implement --slave-parallel-max-queue to limit memory usage of SQL thread read-ahead in the relay log. --- mysql-test/r/mysqld--help.result | 6 ++ .../r/slave_parallel_max_queued_basic.result | 13 +++++ .../t/slave_parallel_max_queued_basic.test | 14 +++++ sql/mysqld.cc | 1 + sql/mysqld.h | 1 + sql/rpl_parallel.cc | 56 +++++++++++-------- sql/rpl_parallel.h | 25 ++++++++- sql/slave.cc | 14 +++-- sql/sys_vars.cc | 10 ++++ 9 files changed, 113 insertions(+), 27 deletions(-) create mode 100644 mysql-test/suite/sys_vars/r/slave_parallel_max_queued_basic.result create mode 100644 mysql-test/suite/sys_vars/t/slave_parallel_max_queued_basic.test diff --git a/mysql-test/r/mysqld--help.result b/mysql-test/r/mysqld--help.result index f6f03b42270..82911bde00f 100644 --- a/mysql-test/r/mysqld--help.result +++ b/mysql-test/r/mysqld--help.result @@ -794,6 +794,11 @@ The following options may be given as the first argument: --slave-net-timeout=# Number of seconds to wait for more data from any master/slave connection before aborting the read + --slave-parallel-max-queued=# + Limit on how much memory SQL threads should use per + parallel replication thread when reading ahead in the + relay log looking for opportunities for parallel + replication. Only used when --slave-parallel-threads > 0. --slave-parallel-threads=# If non-zero, number of threads to spawn to apply in parallel events on the slave that were group-committed on @@ -1148,6 +1153,7 @@ slave-compressed-protocol FALSE slave-exec-mode STRICT slave-max-allowed-packet 1073741824 slave-net-timeout 3600 +slave-parallel-max-queued 131072 slave-parallel-threads 0 slave-skip-errors (No default value) slave-sql-verify-checksum TRUE diff --git a/mysql-test/suite/sys_vars/r/slave_parallel_max_queued_basic.result b/mysql-test/suite/sys_vars/r/slave_parallel_max_queued_basic.result new file mode 100644 index 00000000000..568ecac6de6 --- /dev/null +++ b/mysql-test/suite/sys_vars/r/slave_parallel_max_queued_basic.result @@ -0,0 +1,13 @@ +SET @save_slave_parallel_max_queued= @@GLOBAL.slave_parallel_max_queued; +SELECT @@GLOBAL.slave_parallel_max_queued as 'Check default'; +Check default +131072 +SELECT @@SESSION.slave_parallel_max_queued as 'no session var'; +ERROR HY000: Variable 'slave_parallel_max_queued' is a GLOBAL variable +SET GLOBAL slave_parallel_max_queued= 0; +SET GLOBAL slave_parallel_max_queued= DEFAULT; +SET GLOBAL slave_parallel_max_queued= 65536; +SELECT @@GLOBAL.slave_parallel_max_queued; +@@GLOBAL.slave_parallel_max_queued +65536 +SET GLOBAL slave_parallel_max_queued = @save_slave_parallel_max_queued; diff --git a/mysql-test/suite/sys_vars/t/slave_parallel_max_queued_basic.test b/mysql-test/suite/sys_vars/t/slave_parallel_max_queued_basic.test new file mode 100644 index 00000000000..e3d3a9365f1 --- /dev/null +++ b/mysql-test/suite/sys_vars/t/slave_parallel_max_queued_basic.test @@ -0,0 +1,14 @@ +--source include/not_embedded.inc + +SET @save_slave_parallel_max_queued= @@GLOBAL.slave_parallel_max_queued; + +SELECT @@GLOBAL.slave_parallel_max_queued as 'Check default'; +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +SELECT @@SESSION.slave_parallel_max_queued as 'no session var'; + +SET GLOBAL slave_parallel_max_queued= 0; +SET GLOBAL slave_parallel_max_queued= DEFAULT; +SET GLOBAL slave_parallel_max_queued= 65536; +SELECT @@GLOBAL.slave_parallel_max_queued; + +SET GLOBAL slave_parallel_max_queued = @save_slave_parallel_max_queued; diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 1e7deef8d89..12ee904cc1b 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -548,6 +548,7 @@ ulong stored_program_cache_size= 0; ulong opt_slave_parallel_threads= 0; ulong opt_binlog_commit_wait_count= 0; ulong opt_binlog_commit_wait_usec= 0; +ulong opt_slave_parallel_max_queued= 131072; const double log_10[] = { 1e000, 1e001, 1e002, 1e003, 1e004, 1e005, 1e006, 1e007, 1e008, 1e009, diff --git a/sql/mysqld.h b/sql/mysqld.h index e45b48f0332..cbcff93e423 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -177,6 +177,7 @@ extern ulong opt_binlog_rows_event_max_size; extern ulong rpl_recovery_rank, thread_cache_size; extern ulong stored_program_cache_size; extern ulong opt_slave_parallel_threads; +extern ulong opt_slave_parallel_max_queued; extern ulong opt_binlog_commit_wait_count; extern ulong opt_binlog_commit_wait_usec; extern ulong back_log; diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index 842dcefa0a6..b8d75c7bc82 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -14,10 +14,6 @@ following transactions, so slave binlog position will be correct. And all the retry logic for temporary errors like deadlock. - - We need some user-configurable limit on how far ahead the SQL thread will - fetch and queue events for parallel execution (otherwise if slave gets - behind we will fill up memory with pending malloc()'ed events). - - In GTID replication, we should not need to update master.info and relay-log.info on disk at all except at slave thread stop. They are not used to know where to restart, the updates are not crash-safe, and it @@ -32,6 +28,7 @@ crashes in the middle of writing the event group to the binlog. The slave rolls back the transaction; parallel execution needs to be able to deal with this wrt. commit_orderer and such. + See Format_description_log_event::do_apply_event(). - Retry of failed transactions is not yet implemented for the parallel case. */ @@ -147,8 +144,9 @@ handle_rpl_parallel_thread(void *arg) "Waiting for work from SQL thread"); while (!(events= rpt->event_queue) && !rpt->stop && !thd->killed) mysql_cond_wait(&rpt->COND_rpl_thread, &rpt->LOCK_rpl_thread); - rpt->event_queue= rpt->last_in_queue= NULL; + rpt->dequeue(events); thd->exit_cond(old_msg); + mysql_cond_signal(&rpt->COND_rpl_thread); more_events: while (events) @@ -286,7 +284,7 @@ handle_rpl_parallel_thread(void *arg) This is faster than having to wakeup the pool manager thread to give us a new event. */ - rpt->event_queue= rpt->last_in_queue= NULL; + rpt->dequeue(events); mysql_mutex_unlock(&rpt->LOCK_rpl_thread); goto more_events; } @@ -619,7 +617,8 @@ rpl_parallel::wait_for_done() */ bool -rpl_parallel::do_event(rpl_group_info *serial_rgi, Log_event *ev) +rpl_parallel::do_event(rpl_group_info *serial_rgi, Log_event *ev, + ulonglong event_size) { rpl_parallel_entry *e; rpl_parallel_thread *cur_thread; @@ -653,6 +652,7 @@ rpl_parallel::do_event(rpl_group_info *serial_rgi, Log_event *ev) return true; } qev->ev= ev; + qev->event_size= event_size; qev->next= NULL; strcpy(qev->event_relay_log_name, rli->event_relay_log_name); qev->event_relay_log_pos= rli->event_relay_log_pos; @@ -715,17 +715,33 @@ rpl_parallel::do_event(rpl_group_info *serial_rgi, Log_event *ev) if (cur_thread) { mysql_mutex_lock(&cur_thread->LOCK_rpl_thread); - if (cur_thread->current_entry != e) + for (;;) { - /* - The worker thread became idle, and returned to the free list and - possibly was allocated to a different request. This also means - that everything previously queued has already been executed, else - the worker thread would not have become idle. So we should - allocate a new worker thread. - */ - mysql_mutex_unlock(&cur_thread->LOCK_rpl_thread); - e->rpl_thread= cur_thread= NULL; + if (cur_thread->current_entry != e) + { + /* + The worker thread became idle, and returned to the free list and + possibly was allocated to a different request. This also means + that everything previously queued has already been executed, + else the worker thread would not have become idle. So we should + allocate a new worker thread. + */ + mysql_mutex_unlock(&cur_thread->LOCK_rpl_thread); + e->rpl_thread= cur_thread= NULL; + break; + } + else if (cur_thread->queued_size <= opt_slave_parallel_max_queued) + break; // The thread is ready to queue into + else + { + /* + We have reached the limit of how much memory we are allowed to + use for queuing events, so wait for the thread to consume some + of its queue. + */ + mysql_cond_wait(&cur_thread->COND_rpl_thread, + &cur_thread->LOCK_rpl_thread); + } } } @@ -819,11 +835,7 @@ rpl_parallel::do_event(rpl_group_info *serial_rgi, Log_event *ev) /* Queue the event for processing. */ - if (cur_thread->last_in_queue) - cur_thread->last_in_queue->next= qev; - else - cur_thread->event_queue= qev; - cur_thread->last_in_queue= qev; + cur_thread->enqueue(qev); mysql_mutex_unlock(&cur_thread->LOCK_rpl_thread); mysql_cond_signal(&cur_thread->COND_rpl_thread); diff --git a/sql/rpl_parallel.h b/sql/rpl_parallel.h index 7057ec66de2..fe9c6708e97 100644 --- a/sql/rpl_parallel.h +++ b/sql/rpl_parallel.h @@ -27,7 +27,29 @@ struct rpl_parallel_thread { char event_relay_log_name[FN_REFLEN]; char future_event_master_log_name[FN_REFLEN]; ulonglong event_relay_log_pos; + size_t event_size; } *event_queue, *last_in_queue; + uint64 queued_size; + + void enqueue(queued_event *qev) + { + if (last_in_queue) + last_in_queue->next= qev; + else + event_queue= qev; + last_in_queue= qev; + queued_size+= qev->event_size; + } + + void dequeue(queued_event *list) + { + queued_event *tmp; + + DBUG_ASSERT(list == event_queue); + event_queue= last_in_queue= NULL; + for (tmp= list; tmp; tmp= tmp->next) + queued_size-= tmp->event_size; + } }; @@ -87,7 +109,8 @@ struct rpl_parallel { void reset(); rpl_parallel_entry *find(uint32 domain_id); void wait_for_done(); - bool do_event(rpl_group_info *serial_rgi, Log_event *ev); + bool do_event(rpl_group_info *serial_rgi, Log_event *ev, + ulonglong event_size); }; diff --git a/sql/slave.cc b/sql/slave.cc index e73e3e14c10..b2bd8b9423e 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -156,7 +156,7 @@ static int safe_reconnect(THD* thd, MYSQL* mysql, Master_info* mi, bool suppress_warnings); static int connect_to_master(THD* thd, MYSQL* mysql, Master_info* mi, bool reconnect, bool suppress_warnings); -static Log_event* next_event(rpl_group_info* rgi); +static Log_event* next_event(rpl_group_info* rgi, ulonglong *event_size); static int queue_event(Master_info* mi,const char* buf,ulong event_len); static int terminate_slave_thread(THD *thd, mysql_mutex_t *term_lock, @@ -3273,6 +3273,7 @@ inline void update_state_of_relay_log(Relay_log_info *rli, Log_event *ev) static int exec_relay_log_event(THD* thd, Relay_log_info* rli, rpl_group_info *serial_rgi) { + ulonglong event_size; DBUG_ENTER("exec_relay_log_event"); /* @@ -3282,7 +3283,7 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, */ mysql_mutex_lock(&rli->data_lock); - Log_event * ev = next_event(serial_rgi); + Log_event *ev= next_event(serial_rgi, &event_size); if (sql_slave_killed(serial_rgi)) { @@ -3344,7 +3345,7 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, */ if (opt_slave_parallel_threads > 0 && rli->slave_skip_counter == 0) - DBUG_RETURN(rli->parallel.do_event(serial_rgi, ev)); + DBUG_RETURN(rli->parallel.do_event(serial_rgi, ev, event_size)); /* For GTID, allocate a new sub_id for the given domain_id. @@ -5836,8 +5837,10 @@ static IO_CACHE *reopen_relay_log(Relay_log_info *rli, const char **errmsg) @return The event read, or NULL on error. If an error occurs, the error is reported through the sql_print_information() or sql_print_error() functions. + + The size of the read event (in bytes) is returned in *event_size. */ -static Log_event* next_event(rpl_group_info *rgi) +static Log_event* next_event(rpl_group_info *rgi, ulonglong *event_size) { Log_event* ev; Relay_log_info *rli= rgi->rli; @@ -5848,6 +5851,7 @@ static Log_event* next_event(rpl_group_info *rgi) DBUG_ENTER("next_event"); DBUG_ASSERT(thd != 0 && thd == rli->sql_driver_thd); + *event_size= 0; #ifndef DBUG_OFF if (abort_slave_event_count && !rli->events_till_abort--) @@ -5932,11 +5936,13 @@ static Log_event* next_event(rpl_group_info *rgi) opt_slave_sql_verify_checksum))) { + ulonglong old_pos= rli->future_event_relay_log_pos; /* read it while we have a lock, to avoid a mutex lock in inc_event_relay_log_pos() */ rli->future_event_relay_log_pos= my_b_tell(cur_log); + *event_size= rli->future_event_relay_log_pos - old_pos; if (hot_log) mysql_mutex_unlock(log_lock); diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index d509a614b6e..1e6c9c69667 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -1479,6 +1479,16 @@ static Sys_var_ulong Sys_slave_parallel_threads( VALID_RANGE(0,16383), DEFAULT(0), BLOCK_SIZE(1), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(check_slave_parallel_threads), ON_UPDATE(fix_slave_parallel_threads)); + + +static Sys_var_ulong Sys_slave_parallel_max_queued( + "slave_parallel_max_queued", + "Limit on how much memory SQL threads should use per parallel " + "replication thread when reading ahead in the relay log looking for " + "opportunities for parallel replication. Only used when " + "--slave-parallel-threads > 0.", + GLOBAL_VAR(opt_slave_parallel_max_queued), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0,2147483647), DEFAULT(131072), BLOCK_SIZE(1)); #endif From 7a22b6a6550ef6f798a0f5ff21d645ae43b3cbb2 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 24 Oct 2013 14:37:45 +0200 Subject: [PATCH 35/41] MDEV-4506: Parallel replication. Fix uninitialised variable. --- sql/slave.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/sql/slave.cc b/sql/slave.cc index b2bd8b9423e..113462b5aa0 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -4200,6 +4200,7 @@ pthread_handler_t handle_slave_sql(void *arg) "Error initializing relay log position: %s", errmsg); goto err; } + strcpy(rli->future_event_master_log_name, rli->group_master_log_name); THD_CHECK_SENTRY(thd); #ifndef DBUG_OFF { From 80d0dd7babb5ade8345cdd7065e8f9ef6b65e3da Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 25 Oct 2013 12:56:12 +0200 Subject: [PATCH 36/41] MDEV-4506: Parallel replication. Do not update relay-log.info and master.info on disk after every event when using GTID mode: - relay-log.info and master.info are not crash-safe, and are not used when slave restarts in GTID mode (slave connects with GTID position instead and immediately rewrites the file with the new, correct information found). - When using GTID and parallel replication, the position in relay-log.info is misleading at best and simply wrong at worst. - When using parallel replication, the fact that every single transaction needs to do a write() syscall to the same file is likely to become a serious bottleneck. The files are still written at normal slave stop. In non-GTID mode, the files are written as normal (this is needed to be able to restart after slave crash, even if such restart is then not crash-safe, no change). --- sql/rpl_parallel.cc | 7 +------ sql/rpl_rli.cc | 9 ++++++--- sql/slave.cc | 7 ++++++- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index b8d75c7bc82..e1d8b3a2f0c 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -14,10 +14,7 @@ following transactions, so slave binlog position will be correct. And all the retry logic for temporary errors like deadlock. - - In GTID replication, we should not need to update master.info and - relay-log.info on disk at all except at slave thread stop. They are not - used to know where to restart, the updates are not crash-safe, and it - could negatively affect performance. + - Retry of failed transactions is not yet implemented for the parallel case. - All the waits (eg. in struct wait_for_commit and in rpl_parallel_thread_pool::get_thread()) need to be killable. And on kill, @@ -29,8 +26,6 @@ slave rolls back the transaction; parallel execution needs to be able to deal with this wrt. commit_orderer and such. See Format_description_log_event::do_apply_event(). - - - Retry of failed transactions is not yet implemented for the parallel case. */ struct rpl_parallel_thread_pool global_rpl_thread_pool; diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index d8a604cfe32..ebbe5f4407c 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -1274,9 +1274,12 @@ void Relay_log_info::stmt_done(my_off_t event_master_log_pos, DBA aware of the problem in the error log. */ } - DBUG_EXECUTE_IF("inject_crash_before_flush_rli", DBUG_SUICIDE();); - flush_relay_log_info(this); - DBUG_EXECUTE_IF("inject_crash_after_flush_rli", DBUG_SUICIDE();); + if (mi->using_gtid == Master_info::USE_GTID_NO) + { + DBUG_EXECUTE_IF("inject_crash_before_flush_rli", DBUG_SUICIDE();); + flush_relay_log_info(this); + DBUG_EXECUTE_IF("inject_crash_after_flush_rli", DBUG_SUICIDE();); + } /* Note that Rotate_log_event::do_apply_event() does not call this function, so there is no chance that a fake rotate event resets diff --git a/sql/slave.cc b/sql/slave.cc index 113462b5aa0..fcc92f42536 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -3901,7 +3901,8 @@ Stopping slave I/O thread due to out-of-memory error from master"); goto err; } - if (flush_master_info(mi, TRUE, TRUE)) + if (mi->using_gtid != Master_info::USE_GTID_NO && + flush_master_info(mi, TRUE, TRUE)) { sql_print_error("Failed to flush master info file"); goto err; @@ -3978,6 +3979,8 @@ err: mi->mysql=0; } write_ignored_events_info_to_relay_log(thd, mi); + if (mi->using_gtid != Master_info::USE_GTID_NO) + flush_master_info(mi, TRUE, TRUE); thd_proc_info(thd, "Slave io thread waiting for slave mutex on exit"); mysql_mutex_lock(&mi->run_lock); @@ -4462,6 +4465,8 @@ the slave SQL thread with \"SLAVE START\". We stopped at log \ thd->catalog= 0; thd->reset_query(); thd->reset_db(NULL, 0); + if (rli->mi->using_gtid != Master_info::USE_GTID_NO) + flush_relay_log_info(rli); thd_proc_info(thd, "Sql driver thread waiting for slave mutex on exit"); mysql_mutex_lock(&rli->run_lock); err_during_init: From 6a38b594759c41bd3d45ad89379ff38864bd4ba4 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 25 Oct 2013 21:17:14 +0200 Subject: [PATCH 37/41] MDEV-5189: Incorrect parallel apply in parallel replication Two problems were fixed: 1. When not in GTID mode (master_use_gtid=no), then we must not apply events in different domains in parallel (in non-GTID mode we are not capable of restarting at different points in different domains). 2. When transactions B and C group commit together, but after and separate from A, we can apply B and C in parallel, but both B and C must not start until A has committed. Fix sub_id to be globally increasing (not just per-domain increasing) so that this wait (which is based on sub_id) can be done correctly. --- sql/rpl_gtid.cc | 13 ++++++------- sql/rpl_gtid.h | 4 +--- sql/rpl_parallel.cc | 4 +++- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/sql/rpl_gtid.cc b/sql/rpl_gtid.cc index 1e393eab502..da90dcf641a 100644 --- a/sql/rpl_gtid.cc +++ b/sql/rpl_gtid.cc @@ -83,7 +83,7 @@ rpl_slave_state::record_and_update_gtid(THD *thd, rpl_group_info *rgi) rpl_slave_state::rpl_slave_state() - : inited(false), loaded(false) + : last_sub_id(0), inited(false), loaded(false) { my_hash_init(&hash, &my_charset_bin, 32, offsetof(element, domain_id), sizeof(uint32), NULL, my_free, HASH_UNIQUE); @@ -153,6 +153,9 @@ rpl_slave_state::update(uint32 domain_id, uint32 server_id, uint64 sub_id, list_elem->seq_no= seq_no; elem->add(list_elem); + if (last_sub_id < sub_id) + last_sub_id= sub_id; + return 0; } @@ -169,7 +172,6 @@ rpl_slave_state::get_element(uint32 domain_id) if (!(elem= (element *)my_malloc(sizeof(*elem), MYF(MY_WME)))) return NULL; elem->list= NULL; - elem->last_sub_id= 0; elem->domain_id= domain_id; if (my_hash_insert(&hash, (uchar *)elem)) { @@ -469,13 +471,10 @@ end: uint64 rpl_slave_state::next_subid(uint32 domain_id) { - uint32 sub_id= 0; - element *elem; + uint32 sub_id; lock(); - elem= get_element(domain_id); - if (elem) - sub_id= ++elem->last_sub_id; + sub_id= ++last_sub_id; unlock(); return sub_id; diff --git a/sql/rpl_gtid.h b/sql/rpl_gtid.h index 525b34cb160..39c9aee0b9d 100644 --- a/sql/rpl_gtid.h +++ b/sql/rpl_gtid.h @@ -60,7 +60,6 @@ struct rpl_slave_state struct element { struct list_element *list; - uint64 last_sub_id; uint32 domain_id; list_element *grab_list() { list_element *l= list; list= NULL; return l; } @@ -68,8 +67,6 @@ struct rpl_slave_state { l->next= list; list= l; - if (last_sub_id < l->sub_id) - last_sub_id= l->sub_id; } }; @@ -78,6 +75,7 @@ struct rpl_slave_state /* Mutex protecting access to the state. */ mysql_mutex_t LOCK_slave_state; + uint64 last_sub_id; bool inited; bool loaded; diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index e1d8b3a2f0c..e65c543148e 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -657,8 +657,10 @@ rpl_parallel::do_event(rpl_group_info *serial_rgi, Log_event *ev, if (typ == GTID_EVENT) { Gtid_log_event *gtid_ev= static_cast(ev); + uint32 domain_id= (rli->mi->using_gtid == Master_info::USE_GTID_NO ? + 0 : gtid_ev->domain_id); - if (!(e= find(gtid_ev->domain_id)) || + if (!(e= find(domain_id)) || !(rgi= new rpl_group_info(rli)) || event_group_new_gtid(rgi, gtid_ev)) { From 2fbd1c730735cfd857b250e3afb909290ab4821d Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 28 Oct 2013 13:24:56 +0100 Subject: [PATCH 38/41] MDEV-4506: Parallel replication. MDEV-5189: Error handling in parallel replication. Fix error handling in parallel worker threads when a query fails: - Report the error to the error log. - Return the error back, and set rli->abort_slave. - Stop executing more events after the error. --- sql/rpl_parallel.cc | 19 ++--- sql/rpl_rli.cc | 8 +-- sql/rpl_rli.h | 1 + sql/slave.cc | 168 +++++++++++++++++++++++--------------------- sql/slave.h | 1 + 5 files changed, 103 insertions(+), 94 deletions(-) diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index e65c543148e..bbc917b6e9d 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -9,11 +9,6 @@ ToDo list: - - Error handling. If we fail in one of multiple parallel executions, we - need to make a best effort to complete prior transactions and roll back - following transactions, so slave binlog position will be correct. - And all the retry logic for temporary errors like deadlock. - - Retry of failed transactions is not yet implemented for the parallel case. - All the waits (eg. in struct wait_for_commit and in @@ -212,7 +207,7 @@ handle_rpl_parallel_thread(void *arg) processing between the event groups as a simple way to ensure that everything is stopped and cleaned up correctly. */ - if (!sql_worker_killed(thd, rgi, in_event_group)) + if (!rgi->is_error && !sql_worker_killed(thd, rgi, in_event_group)) err= rpt_handle_event(events, rpt); else err= thd->wait_for_prior_commit(); @@ -228,6 +223,13 @@ handle_rpl_parallel_thread(void *arg) delete_or_keep_event_post_apply(rgi, event_type, events->ev); my_free(events); + if (err) + { + rgi->is_error= true; + slave_output_error_info(rgi->rli, thd); + rgi->cleanup_context(thd, true); + rgi->rli->abort_slave= true; + } if (end_of_group) { in_event_group= false; @@ -785,6 +787,7 @@ rpl_parallel::do_event(rpl_group_info *serial_rgi, Log_event *ev, } else if (!is_group_event || !current) { + int err; /* Events like ROTATE and FORMAT_DESCRIPTION. Do not run in worker thread. Same for events not preceeded by GTID (we should not see those normally, @@ -802,11 +805,11 @@ rpl_parallel::do_event(rpl_group_info *serial_rgi, Log_event *ev, rev->new_log_ident, rev->ident_len+1); } - rpt_handle_event(qev, NULL); + err= rpt_handle_event(qev, NULL); delete_or_keep_event_post_apply(serial_rgi, typ, qev->ev); my_free(qev); - return false; + return (err != 0); } else { diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index ebbe5f4407c..b558f2db64c 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -1274,12 +1274,10 @@ void Relay_log_info::stmt_done(my_off_t event_master_log_pos, DBA aware of the problem in the error log. */ } + DBUG_EXECUTE_IF("inject_crash_before_flush_rli", DBUG_SUICIDE();); if (mi->using_gtid == Master_info::USE_GTID_NO) - { - DBUG_EXECUTE_IF("inject_crash_before_flush_rli", DBUG_SUICIDE();); flush_relay_log_info(this); - DBUG_EXECUTE_IF("inject_crash_after_flush_rli", DBUG_SUICIDE();); - } + DBUG_EXECUTE_IF("inject_crash_after_flush_rli", DBUG_SUICIDE();); /* Note that Rotate_log_event::do_apply_event() does not call this function, so there is no chance that a fake rotate event resets @@ -1453,7 +1451,7 @@ rpl_group_info::rpl_group_info(Relay_log_info *rli_) wait_commit_group_info(0), wait_start_sub_id(0), parallel_entry(0), deferred_events(NULL), m_annotate_event(0), tables_to_lock(0), tables_to_lock_count(0), trans_retries(0), last_event_start_time(0), - is_parallel_exec(false), + is_parallel_exec(false), is_error(false), row_stmt_start_timestamp(0), long_find_row_note_printed(false) { bzero(¤t_gtid, sizeof(current_gtid)); diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h index 38268ee85c5..2f049c41d0f 100644 --- a/sql/rpl_rli.h +++ b/sql/rpl_rli.h @@ -558,6 +558,7 @@ struct rpl_group_info */ char future_event_master_log_name[FN_REFLEN]; bool is_parallel_exec; + bool is_error; private: /* diff --git a/sql/slave.cc b/sql/slave.cc index fcc92f42536..acb42feb6e7 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -4078,6 +4078,92 @@ end: } +void +slave_output_error_info(Relay_log_info *rli, THD *thd) +{ + /* + retrieve as much info as possible from the thd and, error + codes and warnings and print this to the error log as to + allow the user to locate the error + */ + uint32 const last_errno= rli->last_error().number; + char llbuff[22]; + + if (thd->is_error()) + { + char const *const errmsg= thd->stmt_da->message(); + + DBUG_PRINT("info", + ("thd->stmt_da->sql_errno()=%d; rli->last_error.number=%d", + thd->stmt_da->sql_errno(), last_errno)); + if (last_errno == 0) + { + /* + This function is reporting an error which was not reported + while executing exec_relay_log_event(). + */ + rli->report(ERROR_LEVEL, thd->stmt_da->sql_errno(), "%s", errmsg); + } + else if (last_errno != thd->stmt_da->sql_errno()) + { + /* + * An error was reported while executing exec_relay_log_event() + * however the error code differs from what is in the thread. + * This function prints out more information to help finding + * what caused the problem. + */ + sql_print_error("Slave (additional info): %s Error_code: %d", + errmsg, thd->stmt_da->sql_errno()); + } + } + + /* Print any warnings issued */ + List_iterator_fast it(thd->warning_info->warn_list()); + MYSQL_ERROR *err; + /* + Added controlled slave thread cancel for replication + of user-defined variables. + */ + bool udf_error = false; + while ((err= it++)) + { + if (err->get_sql_errno() == ER_CANT_OPEN_LIBRARY) + udf_error = true; + sql_print_warning("Slave: %s Error_code: %d", err->get_message_text(), err->get_sql_errno()); + } + if (udf_error) + { + String tmp; + if (rli->mi->using_gtid != Master_info::USE_GTID_NO) + { + tmp.append(STRING_WITH_LEN("; GTID position '")); + rpl_append_gtid_state(&tmp, false); + tmp.append(STRING_WITH_LEN("'")); + } + sql_print_error("Error loading user-defined library, slave SQL " + "thread aborted. Install the missing library, and restart the " + "slave SQL thread with \"SLAVE START\". We stopped at log '%s' " + "position %s%s", RPL_LOG_NAME, llstr(rli->group_master_log_pos, + llbuff), tmp.c_ptr_safe()); + } + else + { + String tmp; + if (rli->mi->using_gtid != Master_info::USE_GTID_NO) + { + tmp.append(STRING_WITH_LEN("; GTID position '")); + rpl_append_gtid_state(&tmp, false); + tmp.append(STRING_WITH_LEN("'")); + } + sql_print_error("\ +Error running query, slave SQL thread aborted. Fix the problem, and restart \ +the slave SQL thread with \"SLAVE START\". We stopped at log \ +'%s' position %s%s", RPL_LOG_NAME, llstr(rli->group_master_log_pos, llbuff), + tmp.c_ptr_safe()); + } +} + + /** Slave SQL thread entry point. @@ -4335,87 +4421,7 @@ log '%s' at position %s, relay log '%s' position: %s%s", RPL_LOG_NAME, DBUG_PRINT("info", ("exec_relay_log_event() failed")); // do not scare the user if SQL thread was simply killed or stopped if (!sql_slave_killed(serial_rgi)) - { - /* - retrieve as much info as possible from the thd and, error - codes and warnings and print this to the error log as to - allow the user to locate the error - */ - uint32 const last_errno= rli->last_error().number; - - if (thd->is_error()) - { - char const *const errmsg= thd->stmt_da->message(); - - DBUG_PRINT("info", - ("thd->stmt_da->sql_errno()=%d; rli->last_error.number=%d", - thd->stmt_da->sql_errno(), last_errno)); - if (last_errno == 0) - { - /* - This function is reporting an error which was not reported - while executing exec_relay_log_event(). - */ - rli->report(ERROR_LEVEL, thd->stmt_da->sql_errno(), "%s", errmsg); - } - else if (last_errno != thd->stmt_da->sql_errno()) - { - /* - * An error was reported while executing exec_relay_log_event() - * however the error code differs from what is in the thread. - * This function prints out more information to help finding - * what caused the problem. - */ - sql_print_error("Slave (additional info): %s Error_code: %d", - errmsg, thd->stmt_da->sql_errno()); - } - } - - /* Print any warnings issued */ - List_iterator_fast it(thd->warning_info->warn_list()); - MYSQL_ERROR *err; - /* - Added controlled slave thread cancel for replication - of user-defined variables. - */ - bool udf_error = false; - while ((err= it++)) - { - if (err->get_sql_errno() == ER_CANT_OPEN_LIBRARY) - udf_error = true; - sql_print_warning("Slave: %s Error_code: %d", err->get_message_text(), err->get_sql_errno()); - } - if (udf_error) - { - String tmp; - if (mi->using_gtid != Master_info::USE_GTID_NO) - { - tmp.append(STRING_WITH_LEN("; GTID position '")); - rpl_append_gtid_state(&tmp, false); - tmp.append(STRING_WITH_LEN("'")); - } - sql_print_error("Error loading user-defined library, slave SQL " - "thread aborted. Install the missing library, and restart the " - "slave SQL thread with \"SLAVE START\". We stopped at log '%s' " - "position %s%s", RPL_LOG_NAME, llstr(rli->group_master_log_pos, - llbuff), tmp.c_ptr_safe()); - } - else - { - String tmp; - if (mi->using_gtid != Master_info::USE_GTID_NO) - { - tmp.append(STRING_WITH_LEN("; GTID position '")); - rpl_append_gtid_state(&tmp, false); - tmp.append(STRING_WITH_LEN("'")); - } - sql_print_error("\ -Error running query, slave SQL thread aborted. Fix the problem, and restart \ -the slave SQL thread with \"SLAVE START\". We stopped at log \ -'%s' position %s%s", RPL_LOG_NAME, llstr(rli->group_master_log_pos, llbuff), - tmp.c_ptr_safe()); - } - } + slave_output_error_info(rli, thd); goto err; } } diff --git a/sql/slave.h b/sql/slave.h index 4e64754a877..3981a9d4f2c 100644 --- a/sql/slave.h +++ b/sql/slave.h @@ -233,6 +233,7 @@ int apply_event_and_update_pos(Log_event* ev, THD* thd, rpl_parallel_thread *rpt); pthread_handler_t handle_slave_io(void *arg); +void slave_output_error_info(Relay_log_info *rli, THD *thd); pthread_handler_t handle_slave_sql(void *arg); bool net_request_file(NET* net, const char* fname); From f2799c68286c5742b5dbdeb65942494ff2ba38af Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 29 Oct 2013 11:52:16 +0100 Subject: [PATCH 39/41] MDEV-5195: Race when switching relay log causing crash In parallel replication, when the IO thread switches relay log, the SQL thread re-opens the current relaylog and seeks to the current position. There was a race that would cause it to sometimes seek to the wrong position, causing corruption and crash. --- sql/rpl_parallel.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index bbc917b6e9d..97e115cc79f 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -835,6 +835,7 @@ rpl_parallel::do_event(rpl_group_info *serial_rgi, Log_event *ev, /* Queue the event for processing. */ + rli->event_relay_log_pos= rli->future_event_relay_log_pos; cur_thread->enqueue(qev); mysql_mutex_unlock(&cur_thread->LOCK_rpl_thread); mysql_cond_signal(&cur_thread->COND_rpl_thread); From 9c8da4ed762a4ad092e23cc07c34212320341ac1 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 30 Oct 2013 07:52:30 +0100 Subject: [PATCH 40/41] MDEV-5196: Server hangs or assertion `!thd->wait_for_commit_ptr' fails on MASTER_POS_WAIT with slave-parallel-threads > 0 Fix a couple of issues in MDEV-4506, Parallel replication: - Missing mysql_cond_signal(), which could cause hangs. - Fix incorrect update of old-style replication position. - Change assertion to error handling (can trigger on manipulated/ corrupt binlog). --- sql/rpl_parallel.cc | 25 ++++++++++++++++++++++--- sql/rpl_rli.cc | 3 ++- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index 97e115cc79f..d62bec6e605 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -197,7 +197,19 @@ handle_rpl_parallel_thread(void *arg) mysql_mutex_unlock(&entry->LOCK_parallel_entry); } - DBUG_ASSERT(!thd->wait_for_commit_ptr); + if(thd->wait_for_commit_ptr) + { + /* + This indicates that we get a new GTID event in the middle of + a not completed event group. This is corrupt binlog (the master + will never write such binlog), so it does not happen unless + someone tries to inject wrong crafted binlog, but let us still + try to handle it somewhat nicely. + */ + rgi->cleanup_context(thd, true); + thd->wait_for_commit_ptr->unregister_wait_for_prior_commit(); + thd->wait_for_commit_ptr->wakeup_subsequent_commits(err); + } thd->wait_for_commit_ptr= &rgi->commit_orderer; } @@ -283,6 +295,7 @@ handle_rpl_parallel_thread(void *arg) */ rpt->dequeue(events); mysql_mutex_unlock(&rpt->LOCK_rpl_thread); + mysql_cond_signal(&rpt->COND_rpl_thread); goto more_events; } @@ -801,8 +814,14 @@ rpl_parallel::do_event(rpl_group_info *serial_rgi, Log_event *ev, if (typ == ROTATE_EVENT) { Rotate_log_event *rev= static_cast(qev->ev); - memcpy(rli->future_event_master_log_name, - rev->new_log_ident, rev->ident_len+1); + if ((rev->server_id != global_system_variables.server_id || + rli->replicate_same_server_id) && + !rev->is_relay_log_event() && + !rli->is_in_group()) + { + memcpy(rli->future_event_master_log_name, + rev->new_log_ident, rev->ident_len+1); + } } err= rpt_handle_event(qev, NULL); diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index b558f2db64c..e0fd8caa90e 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -902,8 +902,9 @@ void Relay_log_info::inc_group_relay_log_pos(ulonglong log_pos, { strcpy(group_master_log_name, rgi->future_event_master_log_name); notify_group_master_log_name_update(); + group_master_log_pos= log_pos; } - if (group_master_log_pos < log_pos) + else if (group_master_log_pos < log_pos) group_master_log_pos= log_pos; } } From 39df665a3332bd9bfb2529419f534a49cfac388c Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 31 Oct 2013 14:11:41 +0100 Subject: [PATCH 41/41] MDEV-5206: Incorrect slave old-style position in MDEV-4506, parallel replication. In parallel replication, there are two kinds of events which are executed in different ways. Normal events that are part of event groups/transactions are executed asynchroneously by being queued for a worker thread. Other events like format description and rotate and such are executed directly in the driver SQL thread. If the direct execution of the other events were to update the old-style position, then the position gets updated too far ahead, before the normal events that have been queued for a worker thread have been executed. So this patch adds some special cases to prevent such position updates ahead of time, and instead queues dummy events for the worker threads, so that they will at an appropriate time do the position updates instead. (Also fix a race in a test case that happened to trigger while running tests for this patch). --- mysql-test/suite/rpl/t/rpl_parallel.test | 3 + sql/log_event.cc | 16 ++-- sql/rpl_parallel.cc | 105 +++++++++++++++++++++-- sql/rpl_parallel.h | 1 + 4 files changed, 114 insertions(+), 11 deletions(-) diff --git a/mysql-test/suite/rpl/t/rpl_parallel.test b/mysql-test/suite/rpl/t/rpl_parallel.test index 89834b790d6..5709cab19c0 100644 --- a/mysql-test/suite/rpl/t/rpl_parallel.test +++ b/mysql-test/suite/rpl/t/rpl_parallel.test @@ -92,6 +92,7 @@ INSERT INTO t2 VALUES (foo(10, --connection server_2 FLUSH LOGS; +--source include/wait_for_binlog_checkpoint.inc SET sql_log_bin=0; --delimiter || CREATE FUNCTION foo(x INT, d1 VARCHAR(500), d2 VARCHAR(500)) @@ -148,6 +149,7 @@ SELECT * FROM t2 WHERE a >= 10 ORDER BY a; --let $binlog_file= slave-bin.000002 --source include/show_binlog_events.inc FLUSH LOGS; +--source include/wait_for_binlog_checkpoint.inc # Restart all the slave parallel worker threads, to clear all debug_sync actions. --connection server_2 @@ -161,6 +163,7 @@ SET debug_sync='RESET'; --echo *** Test that group-committed transactions on the master can replicate in parallel on the slave. *** --connection server_1 FLUSH LOGS; +--source include/wait_for_binlog_checkpoint.inc CREATE TABLE t3 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB; # Create some sentinel rows so that the rows inserted in parallel fall into # separate gaps and do not cause gap lock conflicts. diff --git a/sql/log_event.cc b/sql/log_event.cc index e7c0506a50a..7ce6c203248 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -966,11 +966,17 @@ int Log_event::do_update_pos(rpl_group_info *rgi) if (debug_not_change_ts_if_art_event == 1 && is_artificial_event()) debug_not_change_ts_if_art_event= 0; ); - rli->stmt_done(log_pos, - (is_artificial_event() && - IF_DBUG(debug_not_change_ts_if_art_event > 0, 1) ? - 0 : when), - thd, rgi); + /* + In parallel execution, delay position update for the events that are + not part of event groups (format description, rotate, and such) until + the actual event execution reaches that point. + */ + if (!rgi->is_parallel_exec || is_group_event(get_type_code())) + rli->stmt_done(log_pos, + (is_artificial_event() && + IF_DBUG(debug_not_change_ts_if_art_event > 0, 1) ? + 0 : when), + thd, rgi); DBUG_EXECUTE_IF("let_first_flush_log_change_timestamp", if (debug_not_change_ts_if_art_event == 0) debug_not_change_ts_if_art_event= 2; ); diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index d62bec6e605..8328dd24128 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -56,6 +56,48 @@ rpt_handle_event(rpl_parallel_thread::queued_event *qev, } +static void +handle_queued_pos_update(THD *thd, rpl_parallel_thread::queued_event *qev) +{ + int cmp; + Relay_log_info *rli; + /* + Events that are not part of an event group, such as Format Description, + Stop, GTID List and such, are executed directly in the driver SQL thread, + to keep the relay log state up-to-date. But the associated position update + is done here, in sync with other normal events as they are queued to + worker threads. + */ + if ((thd->variables.option_bits & OPTION_BEGIN) && + opt_using_transactions) + return; + rli= qev->rgi->rli; + mysql_mutex_lock(&rli->data_lock); + cmp= strcmp(rli->group_relay_log_name, qev->event_relay_log_name); + if (cmp < 0) + { + rli->group_relay_log_pos= qev->future_event_relay_log_pos; + strmake_buf(rli->group_relay_log_name, qev->event_relay_log_name); + rli->notify_group_relay_log_name_update(); + } else if (cmp == 0 && + rli->group_relay_log_pos < qev->future_event_relay_log_pos) + rli->group_relay_log_pos= qev->future_event_relay_log_pos; + + cmp= strcmp(rli->group_master_log_name, qev->future_event_master_log_name); + if (cmp < 0) + { + strcpy(rli->group_master_log_name, qev->future_event_master_log_name); + rli->notify_group_master_log_name_update(); + rli->group_master_log_pos= qev->future_event_master_log_pos; + } + else if (cmp == 0 + && rli->group_master_log_pos < qev->future_event_master_log_pos) + rli->group_master_log_pos= qev->future_event_master_log_pos; + mysql_mutex_unlock(&rli->data_lock); + mysql_cond_broadcast(&rli->data_cond); +} + + static bool sql_worker_killed(THD *thd, rpl_group_info *rgi, bool in_event_group) { @@ -142,16 +184,24 @@ handle_rpl_parallel_thread(void *arg) while (events) { struct rpl_parallel_thread::queued_event *next= events->next; - Log_event_type event_type= events->ev->get_type_code(); + Log_event_type event_type; rpl_group_info *rgi= events->rgi; rpl_parallel_entry *entry= rgi->parallel_entry; uint64 wait_for_sub_id; uint64 wait_start_sub_id; bool end_of_group; + if (!events->ev) + { + handle_queued_pos_update(thd, events); + my_free(events); + events= next; + continue; + } + err= 0; /* Handle a new event group, which will be initiated by a GTID event. */ - if (event_type == GTID_EVENT) + if ((event_type= events->ev->get_type_code()) == GTID_EVENT) { in_event_group= true; /* @@ -794,13 +844,15 @@ rpl_parallel::do_event(rpl_group_info *serial_rgi, Log_event *ev, e->last_commit_id= 0; } - e->current_group_info= rgi; + qev->rgi= e->current_group_info= rgi; e->current_sub_id= rgi->gtid_sub_id; current= rgi->parallel_entry= e; } else if (!is_group_event || !current) { + my_off_t log_pos; int err; + bool tmp; /* Events like ROTATE and FORMAT_DESCRIPTION. Do not run in worker thread. Same for events not preceeded by GTID (we should not see those normally, @@ -824,11 +876,52 @@ rpl_parallel::do_event(rpl_group_info *serial_rgi, Log_event *ev, } } + tmp= serial_rgi->is_parallel_exec; + serial_rgi->is_parallel_exec= true; err= rpt_handle_event(qev, NULL); + serial_rgi->is_parallel_exec= tmp; + log_pos= qev->ev->log_pos; delete_or_keep_event_post_apply(serial_rgi, typ, qev->ev); - my_free(qev); - return (err != 0); + if (err) + { + my_free(qev); + return true; + } + qev->ev= NULL; + qev->future_event_master_log_pos= log_pos; + if (!current) + { + handle_queued_pos_update(rli->sql_driver_thd, qev); + my_free(qev); + return false; + } + /* + Queue an empty event, so that the position will be updated in a + reasonable way relative to other events: + + - If the currently executing events are queued serially for a single + thread, the position will only be updated when everything before has + completed. + + - If we are executing multiple independent events in parallel, then at + least the position will not be updated until one of them has reached + the current point. + */ + cur_thread= current->rpl_thread; + if (cur_thread) + { + mysql_mutex_lock(&cur_thread->LOCK_rpl_thread); + if (cur_thread->current_entry != current) + { + /* Not ours anymore, we need to grab a new one. */ + mysql_mutex_unlock(&cur_thread->LOCK_rpl_thread); + cur_thread= NULL; + } + } + if (!cur_thread) + cur_thread= current->rpl_thread= + global_rpl_thread_pool.get_thread(current); } else { @@ -848,8 +941,8 @@ rpl_parallel::do_event(rpl_group_info *serial_rgi, Log_event *ev, cur_thread= current->rpl_thread= global_rpl_thread_pool.get_thread(current); } + qev->rgi= current->current_group_info; } - qev->rgi= current->current_group_info; /* Queue the event for processing. diff --git a/sql/rpl_parallel.h b/sql/rpl_parallel.h index fe9c6708e97..0b9619e5e83 100644 --- a/sql/rpl_parallel.h +++ b/sql/rpl_parallel.h @@ -27,6 +27,7 @@ struct rpl_parallel_thread { char event_relay_log_name[FN_REFLEN]; char future_event_master_log_name[FN_REFLEN]; ulonglong event_relay_log_pos; + my_off_t future_event_master_log_pos; size_t event_size; } *event_queue, *last_in_queue; uint64 queued_size;