MDEV-532: Async InnoDB commit checkpoint.
Make the commit checkpoint inside InnoDB be asynchroneous. Implement a background thread in binlog to do the writing and flushing of binlog checkpoint events to disk.
This commit is contained in:
parent
e97d6232f3
commit
40bbf697aa
@ -70,8 +70,14 @@ show binlog events in 'master-bin.000003' from <binlog_start>;
|
||||
Log_name Pos Event_type Server_id End_log_pos Info
|
||||
master-bin.000003 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
|
||||
master-bin.000003 # Binlog_checkpoint # # master-bin.000001
|
||||
SET DEBUG_SYNC= "RESET";
|
||||
SET @old_dbug= @@global.DEBUG_DBUG;
|
||||
SET GLOBAL debug_dbug="+d,binlog_background_checkpoint_processed";
|
||||
SET DEBUG_SYNC= "now SIGNAL con2_continue";
|
||||
con1 is still pending, no new binlog checkpoint should have been logged.
|
||||
SET DEBUG_SYNC= "now WAIT_FOR binlog_background_checkpoint_processed";
|
||||
SET GLOBAL debug_dbug= @old_dbug;
|
||||
SET DEBUG_SYNC= "RESET";
|
||||
show binlog events in 'master-bin.000003' from <binlog_start>;
|
||||
Log_name Pos Event_type Server_id End_log_pos Info
|
||||
master-bin.000003 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
|
||||
|
@ -118,7 +118,11 @@ master-bin.00000<binlog_start> # Table_map # # table_id: # (test.t1)
|
||||
master-bin.00000<binlog_start> # Write_rows # # table_id: # flags: STMT_END_F
|
||||
master-bin.00000<binlog_start> # Xid # # COMMIT /* XID */
|
||||
SET DEBUG_SYNC= "now SIGNAL con10_cont";
|
||||
SET @old_dbug= @@global.DEBUG_DBUG;
|
||||
SET GLOBAL debug_dbug="+d,binlog_background_checkpoint_processed";
|
||||
SET DEBUG_SYNC= "now SIGNAL con12_cont";
|
||||
SET DEBUG_SYNC= "now WAIT_FOR binlog_background_checkpoint_processed";
|
||||
SET GLOBAL debug_dbug= @old_dbug;
|
||||
SET DEBUG_SYNC= "now SIGNAL con11_cont";
|
||||
Checking that master-bin.000004 is the last binlog checkpoint
|
||||
show binlog events in 'master-bin.00000<binlog_start>' from <binlog_start>;
|
||||
|
@ -71,6 +71,12 @@ SET DEBUG_SYNC= "now WAIT_FOR con2_ready";
|
||||
--let $binlog_file= master-bin.000003
|
||||
--source include/show_binlog_events.inc
|
||||
|
||||
# We need to sync the test case with the background processing of the
|
||||
# commit checkpoint, otherwise we get nondeterministic results.
|
||||
SET DEBUG_SYNC= "RESET";
|
||||
SET @old_dbug= @@global.DEBUG_DBUG;
|
||||
SET GLOBAL debug_dbug="+d,binlog_background_checkpoint_processed";
|
||||
|
||||
SET DEBUG_SYNC= "now SIGNAL con2_continue";
|
||||
|
||||
connection con2;
|
||||
@ -78,6 +84,12 @@ reap;
|
||||
|
||||
connection default;
|
||||
--echo con1 is still pending, no new binlog checkpoint should have been logged.
|
||||
# Make sure commit checkpoint is processed before we check that no checkpoint
|
||||
# event has been binlogged.
|
||||
SET DEBUG_SYNC= "now WAIT_FOR binlog_background_checkpoint_processed";
|
||||
SET GLOBAL debug_dbug= @old_dbug;
|
||||
SET DEBUG_SYNC= "RESET";
|
||||
|
||||
--let $binlog_file= master-bin.000003
|
||||
--source include/show_binlog_events.inc
|
||||
|
||||
|
@ -14,8 +14,24 @@ CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
|
||||
# Insert some data to force a couple binlog rotations (3), so we get some
|
||||
# normal binlog checkpoints before starting the test.
|
||||
INSERT INTO t1 VALUES (100, REPEAT("x", 4100));
|
||||
# Wait for the master-bin.000002 binlog checkpoint to appear.
|
||||
--let $wait_for_all= 0
|
||||
--let $show_statement= SHOW BINLOG EVENTS IN "master-bin.000002"
|
||||
--let $field= Info
|
||||
--let $condition= = "master-bin.000002"
|
||||
--source include/wait_show_condition.inc
|
||||
INSERT INTO t1 VALUES (101, REPEAT("x", 4100));
|
||||
--let $wait_for_all= 0
|
||||
--let $show_statement= SHOW BINLOG EVENTS IN "master-bin.000003"
|
||||
--let $field= Info
|
||||
--let $condition= = "master-bin.000003"
|
||||
--source include/wait_show_condition.inc
|
||||
INSERT INTO t1 VALUES (102, REPEAT("x", 4100));
|
||||
--let $wait_for_all= 0
|
||||
--let $show_statement= SHOW BINLOG EVENTS IN "master-bin.000004"
|
||||
--let $field= Info
|
||||
--let $condition= = "master-bin.000004"
|
||||
--source include/wait_show_condition.inc
|
||||
|
||||
# Now start a bunch of transactions that span multiple binlog
|
||||
# files. Leave then in the state prepared-but-not-committed in the engine
|
||||
@ -153,10 +169,19 @@ SET DEBUG_SYNC= "now SIGNAL con10_cont";
|
||||
connection con10;
|
||||
reap;
|
||||
connection default;
|
||||
|
||||
# We need to sync the test case with the background processing of the
|
||||
# commit checkpoint, otherwise we get nondeterministic results.
|
||||
SET @old_dbug= @@global.DEBUG_DBUG;
|
||||
SET GLOBAL debug_dbug="+d,binlog_background_checkpoint_processed";
|
||||
|
||||
SET DEBUG_SYNC= "now SIGNAL con12_cont";
|
||||
connection con12;
|
||||
reap;
|
||||
connection default;
|
||||
SET DEBUG_SYNC= "now WAIT_FOR binlog_background_checkpoint_processed";
|
||||
SET GLOBAL debug_dbug= @old_dbug;
|
||||
|
||||
SET DEBUG_SYNC= "now SIGNAL con11_cont";
|
||||
connection con11;
|
||||
reap;
|
||||
@ -210,7 +235,20 @@ RESET MASTER;
|
||||
# crash recovery fails due to the error insert used for previous test.
|
||||
INSERT INTO t1 VALUES (21, REPEAT("x", 4100));
|
||||
INSERT INTO t1 VALUES (22, REPEAT("x", 4100));
|
||||
# Wait for the master-bin.000003 binlog checkpoint to appear.
|
||||
--let $wait_for_all= 0
|
||||
--let $show_statement= SHOW BINLOG EVENTS IN "master-bin.000003"
|
||||
--let $field= Info
|
||||
--let $condition= = "master-bin.000003"
|
||||
--source include/wait_show_condition.inc
|
||||
INSERT INTO t1 VALUES (23, REPEAT("x", 4100));
|
||||
# Wait for the last (master-bin.000004) binlog checkpoint to appear.
|
||||
--let $wait_for_all= 0
|
||||
--let $show_statement= SHOW BINLOG EVENTS IN "master-bin.000004"
|
||||
--let $field= Info
|
||||
--let $condition= = "master-bin.000004"
|
||||
--source include/wait_show_condition.inc
|
||||
|
||||
--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
|
||||
wait-binlog_xa_recover.test
|
||||
EOF
|
||||
|
@ -76,6 +76,7 @@ wait/synch/mutex/sql/Master_info::run_lock
|
||||
wait/synch/mutex/sql/Master_info::sleep_lock
|
||||
wait/synch/mutex/sql/MDL_map::mutex
|
||||
wait/synch/mutex/sql/MDL_wait::LOCK_wait_status
|
||||
wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_binlog_background_thread
|
||||
wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_index
|
||||
wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_xid_list
|
||||
wait/synch/mutex/sql/MYSQL_RELAY_LOG::LOCK_index
|
||||
@ -129,6 +130,8 @@ wait/synch/cond/sql/Master_info::sleep_cond
|
||||
wait/synch/cond/sql/Master_info::start_cond
|
||||
wait/synch/cond/sql/Master_info::stop_cond
|
||||
wait/synch/cond/sql/MDL_context::COND_wait_status
|
||||
wait/synch/cond/sql/MYSQL_BIN_LOG::COND_binlog_background_thread
|
||||
wait/synch/cond/sql/MYSQL_BIN_LOG::COND_binlog_background_thread_end
|
||||
wait/synch/cond/sql/MYSQL_BIN_LOG::COND_queue_busy
|
||||
wait/synch/cond/sql/MYSQL_BIN_LOG::COND_xid_list
|
||||
wait/synch/cond/sql/MYSQL_BIN_LOG::update_cond
|
||||
|
@ -56,8 +56,11 @@ where event_name like "%MYSQL_BIN_LOG%"
|
||||
and event_name not like "%MYSQL_BIN_LOG::update_cond"
|
||||
order by event_name;
|
||||
EVENT_NAME COUNT_STAR
|
||||
wait/synch/cond/sql/MYSQL_BIN_LOG::COND_binlog_background_thread NONE
|
||||
wait/synch/cond/sql/MYSQL_BIN_LOG::COND_binlog_background_thread_end NONE
|
||||
wait/synch/cond/sql/MYSQL_BIN_LOG::COND_queue_busy NONE
|
||||
wait/synch/cond/sql/MYSQL_BIN_LOG::COND_xid_list NONE
|
||||
wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_binlog_background_thread MANY
|
||||
wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_index MANY
|
||||
wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_xid_list MANY
|
||||
"Expect no slave relay log"
|
||||
@ -131,8 +134,11 @@ where event_name like "%MYSQL_BIN_LOG%"
|
||||
and event_name not like "%MYSQL_BIN_LOG::update_cond"
|
||||
order by event_name;
|
||||
EVENT_NAME COUNT_STAR
|
||||
wait/synch/cond/sql/MYSQL_BIN_LOG::COND_binlog_background_thread MANY
|
||||
wait/synch/cond/sql/MYSQL_BIN_LOG::COND_binlog_background_thread_end NONE
|
||||
wait/synch/cond/sql/MYSQL_BIN_LOG::COND_queue_busy NONE
|
||||
wait/synch/cond/sql/MYSQL_BIN_LOG::COND_xid_list NONE
|
||||
wait/synch/cond/sql/MYSQL_BIN_LOG::COND_xid_list MANY
|
||||
wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_binlog_background_thread MANY
|
||||
wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_index MANY
|
||||
wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_xid_list MANY
|
||||
"Expect a slave relay log"
|
||||
|
@ -984,6 +984,7 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
|
||||
DBUG_ENTER("debug_sync_eval_action");
|
||||
DBUG_ASSERT(thd);
|
||||
DBUG_ASSERT(action_str);
|
||||
DBUG_PRINT("debug_sync", ("action_str='%s'", action_str));
|
||||
|
||||
/*
|
||||
Get debug sync point name. Or a special command.
|
||||
|
167
sql/log.cc
167
sql/log.cc
@ -54,6 +54,7 @@
|
||||
#include "rpl_handler.h"
|
||||
#include "debug_sync.h"
|
||||
#include "sql_show.h"
|
||||
#include "my_pthread.h"
|
||||
|
||||
/* max size of the log message */
|
||||
#define MAX_LOG_BUFFER_SIZE 1024
|
||||
@ -107,6 +108,17 @@ static SHOW_VAR binlog_status_vars_detail[]=
|
||||
{NullS, NullS, SHOW_LONG}
|
||||
};
|
||||
|
||||
/*
|
||||
Variables for the binlog background thread.
|
||||
Protected by the MYSQL_BIN_LOG::LOCK_binlog_background_thread mutex.
|
||||
*/
|
||||
static bool binlog_background_thread_started= false;
|
||||
static bool binlog_background_thread_stop= false;
|
||||
static MYSQL_BIN_LOG::xid_count_per_binlog *
|
||||
binlog_background_thread_queue= NULL;
|
||||
|
||||
static bool start_binlog_background_thread();
|
||||
|
||||
|
||||
/**
|
||||
purge logs, master and slave sides both, related error code
|
||||
@ -2958,12 +2970,28 @@ void MYSQL_BIN_LOG::cleanup()
|
||||
my_free(b);
|
||||
}
|
||||
|
||||
/* Wait for the binlog background thread to stop. */
|
||||
if (!is_relay_log && binlog_background_thread_started)
|
||||
{
|
||||
mysql_mutex_lock(&LOCK_binlog_background_thread);
|
||||
binlog_background_thread_stop= true;
|
||||
mysql_cond_signal(&COND_binlog_background_thread);
|
||||
while (binlog_background_thread_stop)
|
||||
mysql_cond_wait(&COND_binlog_background_thread_end,
|
||||
&LOCK_binlog_background_thread);
|
||||
mysql_mutex_unlock(&LOCK_binlog_background_thread);
|
||||
binlog_background_thread_started= false;
|
||||
}
|
||||
|
||||
mysql_mutex_destroy(&LOCK_log);
|
||||
mysql_mutex_destroy(&LOCK_index);
|
||||
mysql_mutex_destroy(&LOCK_xid_list);
|
||||
mysql_mutex_destroy(&LOCK_binlog_background_thread);
|
||||
mysql_cond_destroy(&update_cond);
|
||||
mysql_cond_destroy(&COND_queue_busy);
|
||||
mysql_cond_destroy(&COND_xid_list);
|
||||
mysql_cond_destroy(&COND_binlog_background_thread);
|
||||
mysql_cond_destroy(&COND_binlog_background_thread_end);
|
||||
}
|
||||
DBUG_VOID_RETURN;
|
||||
}
|
||||
@ -2989,6 +3017,13 @@ void MYSQL_BIN_LOG::init_pthread_objects()
|
||||
mysql_cond_init(m_key_update_cond, &update_cond, 0);
|
||||
mysql_cond_init(m_key_COND_queue_busy, &COND_queue_busy, 0);
|
||||
mysql_cond_init(key_BINLOG_COND_xid_list, &COND_xid_list, 0);
|
||||
|
||||
mysql_mutex_init(key_BINLOG_LOCK_binlog_background_thread,
|
||||
&LOCK_binlog_background_thread, MY_MUTEX_INIT_FAST);
|
||||
mysql_cond_init(key_BINLOG_COND_binlog_background_thread,
|
||||
&COND_binlog_background_thread, 0);
|
||||
mysql_cond_init(key_BINLOG_COND_binlog_background_thread_end,
|
||||
&COND_binlog_background_thread_end, 0);
|
||||
}
|
||||
|
||||
|
||||
@ -3086,6 +3121,10 @@ bool MYSQL_BIN_LOG::open(const char *log_name,
|
||||
DBUG_ENTER("MYSQL_BIN_LOG::open");
|
||||
DBUG_PRINT("enter",("log_type: %d",(int) log_type_arg));
|
||||
|
||||
if (!is_relay_log && !binlog_background_thread_started &&
|
||||
start_binlog_background_thread())
|
||||
DBUG_RETURN(1);
|
||||
|
||||
if (init_and_set_log_file_name(log_name, new_name, log_type_arg,
|
||||
io_cache_type_arg))
|
||||
{
|
||||
@ -5541,11 +5580,7 @@ bool general_log_write(THD *thd, enum enum_server_command command,
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
I would like to make this function static, but this causes compiler warnings
|
||||
when it is declared as friend function in log.h.
|
||||
*/
|
||||
void
|
||||
static void
|
||||
binlog_checkpoint_callback(void *cookie)
|
||||
{
|
||||
MYSQL_BIN_LOG::xid_count_per_binlog *entry=
|
||||
@ -8135,9 +8170,129 @@ int TC_LOG_BINLOG::unlog(ulong cookie, my_xid xid)
|
||||
void
|
||||
TC_LOG_BINLOG::commit_checkpoint_notify(void *cookie)
|
||||
{
|
||||
mark_xid_done(((xid_count_per_binlog *)cookie)->binlog_id, true);
|
||||
xid_count_per_binlog *entry= static_cast<xid_count_per_binlog *>(cookie);
|
||||
mysql_mutex_lock(&LOCK_binlog_background_thread);
|
||||
entry->next_in_queue= binlog_background_thread_queue;
|
||||
binlog_background_thread_queue= entry;
|
||||
mysql_cond_signal(&COND_binlog_background_thread);
|
||||
mysql_mutex_unlock(&LOCK_binlog_background_thread);
|
||||
}
|
||||
|
||||
/*
|
||||
Binlog background thread.
|
||||
|
||||
This thread is used to log binlog checkpoints in the background, rather than
|
||||
in the context of random storage engine threads that happen to call
|
||||
commit_checkpoint_notify_ha() and may not like the delays while syncing
|
||||
binlog to disk or may not be setup with all my_thread_init() and other
|
||||
necessary stuff.
|
||||
|
||||
In the future, this thread could also be used to do log rotation in the
|
||||
background, which could elimiate all stalls around binlog rotations.
|
||||
*/
|
||||
pthread_handler_t
|
||||
binlog_background_thread(void *arg __attribute__((unused)))
|
||||
{
|
||||
bool stop;
|
||||
MYSQL_BIN_LOG::xid_count_per_binlog *queue, *next;
|
||||
THD *thd;
|
||||
|
||||
my_thread_init();
|
||||
thd= new THD;
|
||||
thd->system_thread= SYSTEM_THREAD_BINLOG_BACKGROUND;
|
||||
thd->thread_stack= (char*) &thd; /* Set approximate stack start */
|
||||
mysql_mutex_lock(&LOCK_thread_count);
|
||||
thd->thread_id= thread_id++;
|
||||
mysql_mutex_unlock(&LOCK_thread_count);
|
||||
thd->store_globals();
|
||||
|
||||
for (;;)
|
||||
{
|
||||
/*
|
||||
Wait until there is something in the queue to process, or we are asked
|
||||
to shut down.
|
||||
*/
|
||||
thd_proc_info(thd, "Waiting for background binlog tasks");
|
||||
mysql_mutex_lock(&mysql_bin_log.LOCK_binlog_background_thread);
|
||||
for (;;)
|
||||
{
|
||||
stop= binlog_background_thread_stop;
|
||||
queue= binlog_background_thread_queue;
|
||||
if (stop || queue)
|
||||
break;
|
||||
mysql_cond_wait(&mysql_bin_log.COND_binlog_background_thread,
|
||||
&mysql_bin_log.LOCK_binlog_background_thread);
|
||||
}
|
||||
/* Grab the queue, if any. */
|
||||
binlog_background_thread_queue= NULL;
|
||||
mysql_mutex_unlock(&mysql_bin_log.LOCK_binlog_background_thread);
|
||||
|
||||
/* Process any incoming commit_checkpoint_notify() calls. */
|
||||
while (queue)
|
||||
{
|
||||
thd_proc_info(thd, "Processing binlog checkpoint notification");
|
||||
/* Grab next pointer first, as mark_xid_done() may free the element. */
|
||||
next= queue->next_in_queue;
|
||||
mysql_bin_log.mark_xid_done(queue->binlog_id, true);
|
||||
queue= next;
|
||||
|
||||
DBUG_EXECUTE_IF("binlog_background_checkpoint_processed",
|
||||
DBUG_ASSERT(!debug_sync_set_action(
|
||||
thd,
|
||||
STRING_WITH_LEN("now SIGNAL binlog_background_checkpoint_processed")));
|
||||
);
|
||||
}
|
||||
|
||||
if (stop)
|
||||
break;
|
||||
}
|
||||
|
||||
thd_proc_info(thd, "Stopping binlog background thread");
|
||||
|
||||
mysql_mutex_lock(&LOCK_thread_count);
|
||||
delete thd;
|
||||
mysql_mutex_unlock(&LOCK_thread_count);
|
||||
|
||||
my_thread_end();
|
||||
|
||||
/* Signal that we are (almost) stopped. */
|
||||
mysql_mutex_lock(&mysql_bin_log.LOCK_binlog_background_thread);
|
||||
binlog_background_thread_stop= false;
|
||||
mysql_cond_signal(&mysql_bin_log.COND_binlog_background_thread_end);
|
||||
mysql_mutex_unlock(&mysql_bin_log.LOCK_binlog_background_thread);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef HAVE_PSI_INTERFACE
|
||||
static PSI_thread_key key_thread_binlog;
|
||||
|
||||
static PSI_thread_info all_binlog_threads[]=
|
||||
{
|
||||
{ &key_thread_binlog, "binlog_background", PSI_FLAG_GLOBAL},
|
||||
};
|
||||
#endif /* HAVE_PSI_INTERFACE */
|
||||
|
||||
static bool
|
||||
start_binlog_background_thread()
|
||||
{
|
||||
pthread_t th;
|
||||
|
||||
#ifdef HAVE_PSI_INTERFACE
|
||||
if (PSI_server)
|
||||
PSI_server->register_thread("sql", all_binlog_threads,
|
||||
array_elements(all_binlog_threads));
|
||||
#endif
|
||||
|
||||
if (mysql_thread_create(key_thread_binlog, &th, NULL,
|
||||
binlog_background_thread, NULL))
|
||||
return 1;
|
||||
|
||||
binlog_background_thread_started= true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name,
|
||||
IO_CACHE *first_log,
|
||||
Format_description_log_event *fdle)
|
||||
|
55
sql/log.h
55
sql/log.h
@ -395,8 +395,6 @@ private:
|
||||
#define BINLOG_COOKIE_IS_DUMMY(c) \
|
||||
( ((ulong)(c)>>1) == BINLOG_COOKIE_DUMMY_ID )
|
||||
|
||||
void binlog_checkpoint_callback(void *cookie);
|
||||
|
||||
class binlog_cache_mngr;
|
||||
class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
|
||||
{
|
||||
@ -450,27 +448,6 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
|
||||
ulong binlog_id;
|
||||
};
|
||||
|
||||
/*
|
||||
A list of struct xid_count_per_binlog is used to keep track of how many
|
||||
XIDs are in prepared, but not committed, state in each binlog. And how
|
||||
many commit_checkpoint_request()'s are pending.
|
||||
|
||||
When count drops to zero in a binlog after rotation, it means that there
|
||||
are no more XIDs in prepared state, so that binlog is no longer needed
|
||||
for XA crash recovery, and we can log a new binlog checkpoint event.
|
||||
|
||||
The list is protected against simultaneous access from multiple
|
||||
threads by LOCK_xid_list.
|
||||
*/
|
||||
struct xid_count_per_binlog : public ilink {
|
||||
char *binlog_name;
|
||||
uint binlog_name_len;
|
||||
ulong binlog_id;
|
||||
/* Total prepared XIDs and pending checkpoint requests in this binlog. */
|
||||
long xid_count;
|
||||
xid_count_per_binlog(); /* Give link error if constructor used. */
|
||||
};
|
||||
I_List<xid_count_per_binlog> binlog_xid_count_list;
|
||||
/*
|
||||
When this is set, a RESET MASTER is in progress.
|
||||
|
||||
@ -480,7 +457,6 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
|
||||
checkpoint arrives - when all have arrived, RESET MASTER will complete.
|
||||
*/
|
||||
bool reset_master_pending;
|
||||
friend void binlog_checkpoint_callback(void *cookie);
|
||||
|
||||
/* LOCK_log and LOCK_index are inited by init_pthread_objects() */
|
||||
mysql_mutex_t LOCK_index;
|
||||
@ -550,10 +526,35 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
|
||||
int write_transaction_or_stmt(group_commit_entry *entry);
|
||||
bool write_transaction_to_binlog_events(group_commit_entry *entry);
|
||||
void trx_group_commit_leader(group_commit_entry *leader);
|
||||
void mark_xid_done(ulong cookie, bool write_checkpoint);
|
||||
void mark_xids_active(ulong cookie, uint xid_count);
|
||||
|
||||
public:
|
||||
/*
|
||||
A list of struct xid_count_per_binlog is used to keep track of how many
|
||||
XIDs are in prepared, but not committed, state in each binlog. And how
|
||||
many commit_checkpoint_request()'s are pending.
|
||||
|
||||
When count drops to zero in a binlog after rotation, it means that there
|
||||
are no more XIDs in prepared state, so that binlog is no longer needed
|
||||
for XA crash recovery, and we can log a new binlog checkpoint event.
|
||||
|
||||
The list is protected against simultaneous access from multiple
|
||||
threads by LOCK_xid_list.
|
||||
*/
|
||||
struct xid_count_per_binlog : public ilink {
|
||||
char *binlog_name;
|
||||
uint binlog_name_len;
|
||||
ulong binlog_id;
|
||||
/* Total prepared XIDs and pending checkpoint requests in this binlog. */
|
||||
long xid_count;
|
||||
/* For linking in requests to the binlog background thread. */
|
||||
xid_count_per_binlog *next_in_queue;
|
||||
xid_count_per_binlog(); /* Give link error if constructor used. */
|
||||
};
|
||||
I_List<xid_count_per_binlog> binlog_xid_count_list;
|
||||
mysql_mutex_t LOCK_binlog_background_thread;
|
||||
mysql_cond_t COND_binlog_background_thread;
|
||||
mysql_cond_t COND_binlog_background_thread_end;
|
||||
|
||||
using MYSQL_LOG::generate_name;
|
||||
using MYSQL_LOG::is_open;
|
||||
|
||||
@ -709,6 +710,8 @@ public:
|
||||
bool appendv(const char* buf,uint len,...);
|
||||
bool append(Log_event* ev);
|
||||
|
||||
void mark_xids_active(ulong cookie, uint xid_count);
|
||||
void mark_xid_done(ulong cookie, bool write_checkpoint);
|
||||
void make_log_name(char* buf, const char* log_ident);
|
||||
bool is_active(const char* log_file_name);
|
||||
bool can_purge_log(const char *log_file_name);
|
||||
|
@ -726,6 +726,7 @@ PSI_mutex_key key_LOCK_des_key_file;
|
||||
#endif /* HAVE_OPENSSL */
|
||||
|
||||
PSI_mutex_key key_BINLOG_LOCK_index, key_BINLOG_LOCK_xid_list,
|
||||
key_BINLOG_LOCK_binlog_background_thread,
|
||||
key_delayed_insert_mutex, key_hash_filo_lock, key_LOCK_active_mi,
|
||||
key_LOCK_connection_count, key_LOCK_crypt, key_LOCK_delayed_create,
|
||||
key_LOCK_delayed_insert, key_LOCK_delayed_status, key_LOCK_error_log,
|
||||
@ -768,6 +769,7 @@ static PSI_mutex_info all_server_mutexes[]=
|
||||
|
||||
{ &key_BINLOG_LOCK_index, "MYSQL_BIN_LOG::LOCK_index", 0},
|
||||
{ &key_BINLOG_LOCK_xid_list, "MYSQL_BIN_LOG::LOCK_xid_list", 0},
|
||||
{ &key_BINLOG_LOCK_binlog_background_thread, "MYSQL_BIN_LOG::LOCK_binlog_background_thread", 0},
|
||||
{ &key_RELAYLOG_LOCK_index, "MYSQL_RELAY_LOG::LOCK_index", 0},
|
||||
{ &key_delayed_insert_mutex, "Delayed_insert::mutex", 0},
|
||||
{ &key_hash_filo_lock, "hash_filo::lock", 0},
|
||||
@ -836,6 +838,8 @@ PSI_cond_key key_PAGE_cond, key_COND_active, key_COND_pool;
|
||||
#endif /* HAVE_MMAP */
|
||||
|
||||
PSI_cond_key key_BINLOG_COND_xid_list, key_BINLOG_update_cond,
|
||||
key_BINLOG_COND_binlog_background_thread,
|
||||
key_BINLOG_COND_binlog_background_thread_end,
|
||||
key_COND_cache_status_changed, key_COND_manager,
|
||||
key_COND_rpl_status, key_COND_server_started,
|
||||
key_delayed_insert_cond, key_delayed_insert_cond_client,
|
||||
@ -865,6 +869,8 @@ static PSI_cond_info all_server_conds[]=
|
||||
#endif /* HAVE_MMAP */
|
||||
{ &key_BINLOG_COND_xid_list, "MYSQL_BIN_LOG::COND_xid_list", 0},
|
||||
{ &key_BINLOG_update_cond, "MYSQL_BIN_LOG::update_cond", 0},
|
||||
{ &key_BINLOG_COND_binlog_background_thread, "MYSQL_BIN_LOG::COND_binlog_background_thread", 0},
|
||||
{ &key_BINLOG_COND_binlog_background_thread_end, "MYSQL_BIN_LOG::COND_binlog_background_thread_end", 0},
|
||||
{ &key_BINLOG_COND_queue_busy, "MYSQL_BIN_LOG::COND_queue_busy", 0},
|
||||
{ &key_RELAYLOG_update_cond, "MYSQL_RELAY_LOG::update_cond", 0},
|
||||
{ &key_RELAYLOG_COND_queue_busy, "MYSQL_RELAY_LOG::COND_queue_busy", 0},
|
||||
|
@ -228,6 +228,7 @@ extern PSI_mutex_key key_LOCK_des_key_file;
|
||||
#endif
|
||||
|
||||
extern PSI_mutex_key key_BINLOG_LOCK_index, key_BINLOG_LOCK_xid_list,
|
||||
key_BINLOG_LOCK_binlog_background_thread,
|
||||
key_delayed_insert_mutex, key_hash_filo_lock, key_LOCK_active_mi,
|
||||
key_LOCK_connection_count, key_LOCK_crypt, key_LOCK_delayed_create,
|
||||
key_LOCK_delayed_insert, key_LOCK_delayed_status, key_LOCK_error_log,
|
||||
@ -259,6 +260,8 @@ extern PSI_cond_key key_PAGE_cond, key_COND_active, key_COND_pool;
|
||||
#endif /* HAVE_MMAP */
|
||||
|
||||
extern PSI_cond_key key_BINLOG_COND_xid_list, key_BINLOG_update_cond,
|
||||
key_BINLOG_COND_binlog_background_thread,
|
||||
key_BINLOG_COND_binlog_background_thread_end,
|
||||
key_COND_cache_status_changed, key_COND_manager,
|
||||
key_COND_rpl_status, key_COND_server_started,
|
||||
key_delayed_insert_cond, key_delayed_insert_cond_client,
|
||||
|
@ -58,6 +58,7 @@ Relay_log_info::Relay_log_info(bool is_slave_recovery)
|
||||
{
|
||||
DBUG_ENTER("Relay_log_info::Relay_log_info");
|
||||
|
||||
relay_log.is_relay_log= TRUE;
|
||||
#ifdef HAVE_PSI_INTERFACE
|
||||
relay_log.set_psi_keys(key_RELAYLOG_LOCK_index,
|
||||
key_RELAYLOG_update_cond,
|
||||
@ -216,8 +217,6 @@ a file name for --relay-log-index option", opt_relaylog_index_name);
|
||||
&mi->connection_name);
|
||||
}
|
||||
|
||||
rli->relay_log.is_relay_log= TRUE;
|
||||
|
||||
/*
|
||||
note, that if open() fails, we'll still have index file open
|
||||
but a destructor will take care of that
|
||||
|
@ -1255,7 +1255,8 @@ enum enum_thread_type
|
||||
SYSTEM_THREAD_SLAVE_SQL= 4,
|
||||
SYSTEM_THREAD_NDBCLUSTER_BINLOG= 8,
|
||||
SYSTEM_THREAD_EVENT_SCHEDULER= 16,
|
||||
SYSTEM_THREAD_EVENT_WORKER= 32
|
||||
SYSTEM_THREAD_EVENT_WORKER= 32,
|
||||
SYSTEM_THREAD_BINLOG_BACKGROUND= 64
|
||||
};
|
||||
|
||||
inline char const *
|
||||
|
@ -106,6 +106,7 @@ static ulong commit_threads = 0;
|
||||
static mysql_mutex_t commit_threads_m;
|
||||
static mysql_cond_t commit_cond;
|
||||
static mysql_mutex_t commit_cond_m;
|
||||
static mysql_mutex_t pending_checkpoint_mutex;
|
||||
static bool innodb_inited = 0;
|
||||
|
||||
#define INSIDE_HA_INNOBASE_CC
|
||||
@ -222,11 +223,13 @@ static mysql_pfs_key_t innobase_share_mutex_key;
|
||||
static mysql_pfs_key_t commit_threads_m_key;
|
||||
static mysql_pfs_key_t commit_cond_mutex_key;
|
||||
static mysql_pfs_key_t commit_cond_key;
|
||||
static mysql_pfs_key_t pending_checkpoint_mutex_key;
|
||||
|
||||
static PSI_mutex_info all_pthread_mutexes[] = {
|
||||
{&commit_threads_m_key, "commit_threads_m", 0},
|
||||
{&commit_cond_mutex_key, "commit_cond_mutex", 0},
|
||||
{&innobase_share_mutex_key, "innobase_share_mutex", 0}
|
||||
{&innobase_share_mutex_key, "innobase_share_mutex", 0},
|
||||
{&pending_checkpoint_mutex_key, "pending_checkpoint_mutex", 0}
|
||||
};
|
||||
|
||||
static PSI_cond_info all_innodb_conds[] = {
|
||||
@ -2601,6 +2604,9 @@ innobase_change_buffering_inited_ok:
|
||||
mysql_mutex_init(commit_cond_mutex_key,
|
||||
&commit_cond_m, MY_MUTEX_INIT_FAST);
|
||||
mysql_cond_init(commit_cond_key, &commit_cond, NULL);
|
||||
mysql_mutex_init(pending_checkpoint_mutex_key,
|
||||
&pending_checkpoint_mutex,
|
||||
MY_MUTEX_INIT_FAST);
|
||||
innodb_inited= 1;
|
||||
#ifdef MYSQL_DYNAMIC_PLUGIN
|
||||
if (innobase_hton != p) {
|
||||
@ -2648,6 +2654,7 @@ innobase_end(
|
||||
mysql_mutex_destroy(&commit_threads_m);
|
||||
mysql_mutex_destroy(&commit_cond_m);
|
||||
mysql_cond_destroy(&commit_cond);
|
||||
mysql_mutex_destroy(&pending_checkpoint_mutex);
|
||||
}
|
||||
|
||||
DBUG_RETURN(err);
|
||||
@ -3017,17 +3024,145 @@ innobase_rollback_trx(
|
||||
DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
|
||||
}
|
||||
|
||||
|
||||
struct pending_checkpoint {
|
||||
struct pending_checkpoint *next;
|
||||
handlerton *hton;
|
||||
void *cookie;
|
||||
ib_uint64_t lsn;
|
||||
};
|
||||
static struct pending_checkpoint *pending_checkpoint_list;
|
||||
static struct pending_checkpoint *pending_checkpoint_list_end;
|
||||
|
||||
/*****************************************************************//**
|
||||
Handle a commit checkpoint request from server layer.
|
||||
We simply flush the redo log immediately and do the notify call.*/
|
||||
We put the request in a queue, so that we can notify upper layer about
|
||||
checkpoint complete when we have flushed the redo log.
|
||||
If we have already flushed all relevant redo log, we notify immediately.*/
|
||||
static
|
||||
void
|
||||
innobase_checkpoint_request(
|
||||
handlerton *hton,
|
||||
void *cookie)
|
||||
{
|
||||
log_buffer_flush_to_disk();
|
||||
commit_checkpoint_notify_ha(hton, cookie);
|
||||
ib_uint64_t lsn;
|
||||
ib_uint64_t flush_lsn;
|
||||
struct pending_checkpoint * entry;
|
||||
|
||||
/* Do the allocation outside of lock to reduce contention. The normal
|
||||
case is that not everything is flushed, so we will need to enqueue. */
|
||||
entry = static_cast<struct pending_checkpoint *>
|
||||
(my_malloc(sizeof(*entry), MYF(MY_WME)));
|
||||
if (!entry) {
|
||||
sql_print_error("Failed to allocate %u bytes."
|
||||
" Commit checkpoint will be skipped.",
|
||||
static_cast<unsigned>(sizeof(*entry)));
|
||||
return;
|
||||
}
|
||||
|
||||
entry->next = NULL;
|
||||
entry->hton = hton;
|
||||
entry->cookie = cookie;
|
||||
|
||||
mysql_mutex_lock(&pending_checkpoint_mutex);
|
||||
lsn = log_get_lsn();
|
||||
flush_lsn = log_get_flush_lsn();
|
||||
if (lsn > flush_lsn) {
|
||||
/* Put the request in queue.
|
||||
When the log gets flushed past the lsn, we will remove the
|
||||
entry from the queue and notify the upper layer. */
|
||||
entry->lsn = lsn;
|
||||
if (pending_checkpoint_list_end) {
|
||||
pending_checkpoint_list_end->next = entry;
|
||||
/* There is no need to order the entries in the list
|
||||
by lsn. The upper layer can accept notifications in
|
||||
any order, and short delays in notifications do not
|
||||
significantly impact performance. */
|
||||
} else {
|
||||
pending_checkpoint_list = entry;
|
||||
}
|
||||
pending_checkpoint_list_end = entry;
|
||||
entry = NULL;
|
||||
}
|
||||
mysql_mutex_unlock(&pending_checkpoint_mutex);
|
||||
|
||||
if (entry) {
|
||||
/* We are already flushed. Notify the checkpoint immediately. */
|
||||
commit_checkpoint_notify_ha(entry->hton, entry->cookie);
|
||||
my_free(entry);
|
||||
}
|
||||
}
|
||||
|
||||
/*****************************************************************//**
|
||||
Log code calls this whenever log has been written and/or flushed up
|
||||
to a new position. We use this to notify upper layer of a new commit
|
||||
checkpoint when necessary.*/
|
||||
extern "C" UNIV_INTERN
|
||||
void
|
||||
innobase_mysql_log_notify(
|
||||
/*===============*/
|
||||
ib_uint64_t write_lsn, /*!< in: LSN written to log file */
|
||||
ib_uint64_t flush_lsn) /*!< in: LSN flushed to disk */
|
||||
{
|
||||
struct pending_checkpoint * pending;
|
||||
struct pending_checkpoint * entry;
|
||||
struct pending_checkpoint * last_ready;
|
||||
|
||||
/* It is safe to do a quick check for NULL first without lock.
|
||||
Even if we should race, we will at most skip one checkpoint and
|
||||
take the next one, which is harmless. */
|
||||
if (!pending_checkpoint_list)
|
||||
return;
|
||||
|
||||
mysql_mutex_lock(&pending_checkpoint_mutex);
|
||||
pending = pending_checkpoint_list;
|
||||
if (!pending)
|
||||
{
|
||||
mysql_mutex_unlock(&pending_checkpoint_mutex);
|
||||
return;
|
||||
}
|
||||
|
||||
last_ready = NULL;
|
||||
for (entry = pending; entry != NULL; entry = entry -> next)
|
||||
{
|
||||
/* Notify checkpoints up until the first entry that has not
|
||||
been fully flushed to the redo log. Since we do not maintain
|
||||
the list ordered, in principle there could be more entries
|
||||
later than were also flushed. But there is no harm in
|
||||
delaying notifications for those a bit. And in practise, the
|
||||
list is unlikely to have more than one element anyway, as we
|
||||
flush the redo log at least once every second. */
|
||||
if (entry->lsn > flush_lsn)
|
||||
break;
|
||||
last_ready = entry;
|
||||
}
|
||||
|
||||
if (last_ready)
|
||||
{
|
||||
/* We found some pending checkpoints that are now flushed to
|
||||
disk. So remove them from the list. */
|
||||
pending_checkpoint_list = entry;
|
||||
if (!entry)
|
||||
pending_checkpoint_list_end = NULL;
|
||||
}
|
||||
|
||||
mysql_mutex_unlock(&pending_checkpoint_mutex);
|
||||
|
||||
if (!last_ready)
|
||||
return;
|
||||
|
||||
/* Now that we have released the lock, notify upper layer about all
|
||||
commit checkpoints that have now completed. */
|
||||
for (;;) {
|
||||
entry = pending;
|
||||
pending = pending->next;
|
||||
|
||||
commit_checkpoint_notify_ha(entry->hton, entry->cookie);
|
||||
|
||||
my_free(entry);
|
||||
if (entry == last_ready)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*****************************************************************//**
|
||||
|
@ -136,6 +136,17 @@ innobase_mysql_print_thd(
|
||||
uint max_query_len); /*!< in: max query length to print, or 0 to
|
||||
use the default max length */
|
||||
|
||||
/*****************************************************************//**
|
||||
Log code calls this whenever log has been written and/or flushed up
|
||||
to a new position. We use this to notify upper layer of a new commit
|
||||
checkpoint when necessary.*/
|
||||
UNIV_INTERN
|
||||
void
|
||||
innobase_mysql_log_notify(
|
||||
/*===============*/
|
||||
ib_uint64_t write_lsn, /*!< in: LSN written to log file */
|
||||
ib_uint64_t flush_lsn); /*!< in: LSN flushed to disk */
|
||||
|
||||
/**************************************************************//**
|
||||
Converts a MySQL type to an InnoDB type. Note that this function returns
|
||||
the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1
|
||||
|
@ -151,6 +151,13 @@ UNIV_INLINE
|
||||
ib_uint64_t
|
||||
log_get_lsn(void);
|
||||
/*=============*/
|
||||
/************************************************************//**
|
||||
Gets the last lsn that is fully flushed to disk.
|
||||
@return last flushed lsn */
|
||||
UNIV_INLINE
|
||||
ib_uint64_t
|
||||
log_get_flush_lsn(void);
|
||||
/*=============*/
|
||||
/****************************************************************
|
||||
Gets the log group capacity. It is OK to read the value without
|
||||
holding log_sys->mutex because it is constant.
|
||||
|
@ -411,6 +411,25 @@ log_get_lsn(void)
|
||||
return(lsn);
|
||||
}
|
||||
|
||||
/************************************************************//**
|
||||
Gets the last lsn that is fully flushed to disk.
|
||||
@return last flushed lsn */
|
||||
UNIV_INLINE
|
||||
ib_uint64_t
|
||||
log_get_flush_lsn(void)
|
||||
/*=============*/
|
||||
{
|
||||
ib_uint64_t lsn;
|
||||
|
||||
mutex_enter(&(log_sys->mutex));
|
||||
|
||||
lsn = log_sys->flushed_to_disk_lsn;
|
||||
|
||||
mutex_exit(&(log_sys->mutex));
|
||||
|
||||
return(lsn);
|
||||
}
|
||||
|
||||
/****************************************************************
|
||||
Gets the log group capacity. It is OK to read the value without
|
||||
holding log_sys->mutex because it is constant.
|
||||
|
@ -1353,6 +1353,8 @@ log_write_up_to(
|
||||
ulint loop_count = 0;
|
||||
#endif /* UNIV_DEBUG */
|
||||
ulint unlock;
|
||||
ib_uint64_t write_lsn;
|
||||
ib_uint64_t flush_lsn;
|
||||
|
||||
if (recv_no_ibuf_operations) {
|
||||
/* Recovery is running and no operations on the log files are
|
||||
@ -1530,8 +1532,13 @@ loop:
|
||||
|
||||
log_flush_do_unlocks(unlock);
|
||||
|
||||
write_lsn = log_sys->write_lsn;
|
||||
flush_lsn = log_sys->flushed_to_disk_lsn;
|
||||
|
||||
mutex_exit(&(log_sys->mutex));
|
||||
|
||||
innobase_mysql_log_notify(write_lsn, flush_lsn);
|
||||
|
||||
return;
|
||||
|
||||
do_waits:
|
||||
|
@ -121,6 +121,7 @@ static ulong commit_threads = 0;
|
||||
static mysql_mutex_t commit_threads_m;
|
||||
static mysql_cond_t commit_cond;
|
||||
static mysql_mutex_t commit_cond_m;
|
||||
static mysql_mutex_t pending_checkpoint_mutex;
|
||||
static bool innodb_inited = 0;
|
||||
|
||||
|
||||
@ -254,11 +255,13 @@ static mysql_pfs_key_t innobase_share_mutex_key;
|
||||
static mysql_pfs_key_t commit_threads_m_key;
|
||||
static mysql_pfs_key_t commit_cond_mutex_key;
|
||||
static mysql_pfs_key_t commit_cond_key;
|
||||
static mysql_pfs_key_t pending_checkpoint_mutex_key;
|
||||
|
||||
static PSI_mutex_info all_pthread_mutexes[] = {
|
||||
{&commit_threads_m_key, "commit_threads_m", 0},
|
||||
{&commit_cond_mutex_key, "commit_cond_mutex", 0},
|
||||
{&innobase_share_mutex_key, "innobase_share_mutex", 0}
|
||||
{&innobase_share_mutex_key, "innobase_share_mutex", 0},
|
||||
{&pending_checkpoint_mutex_key, "pending_checkpoint_mutex", 0}
|
||||
};
|
||||
|
||||
static PSI_cond_info all_innodb_conds[] = {
|
||||
@ -3088,6 +3091,9 @@ skip_overwrite:
|
||||
mysql_mutex_init(commit_cond_mutex_key,
|
||||
&commit_cond_m, MY_MUTEX_INIT_FAST);
|
||||
mysql_cond_init(commit_cond_key, &commit_cond, NULL);
|
||||
mysql_mutex_init(pending_checkpoint_mutex_key,
|
||||
&pending_checkpoint_mutex,
|
||||
MY_MUTEX_INIT_FAST);
|
||||
innodb_inited= 1;
|
||||
#ifdef MYSQL_DYNAMIC_PLUGIN
|
||||
if (innobase_hton != p) {
|
||||
@ -3135,6 +3141,7 @@ innobase_end(
|
||||
mysql_mutex_destroy(&commit_threads_m);
|
||||
mysql_mutex_destroy(&commit_cond_m);
|
||||
mysql_cond_destroy(&commit_cond);
|
||||
mysql_mutex_destroy(&pending_checkpoint_mutex);
|
||||
}
|
||||
|
||||
DBUG_RETURN(err);
|
||||
@ -3530,17 +3537,145 @@ innobase_rollback_trx(
|
||||
DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
|
||||
}
|
||||
|
||||
|
||||
struct pending_checkpoint {
|
||||
struct pending_checkpoint *next;
|
||||
handlerton *hton;
|
||||
void *cookie;
|
||||
ib_uint64_t lsn;
|
||||
};
|
||||
static struct pending_checkpoint *pending_checkpoint_list;
|
||||
static struct pending_checkpoint *pending_checkpoint_list_end;
|
||||
|
||||
/*****************************************************************//**
|
||||
Handle a commit checkpoint request from server layer.
|
||||
We simply flush the redo log immediately and do the notify call.*/
|
||||
We put the request in a queue, so that we can notify upper layer about
|
||||
checkpoint complete when we have flushed the redo log.
|
||||
If we have already flushed all relevant redo log, we notify immediately.*/
|
||||
static
|
||||
void
|
||||
innobase_checkpoint_request(
|
||||
handlerton *hton,
|
||||
void *cookie)
|
||||
{
|
||||
log_buffer_flush_to_disk();
|
||||
commit_checkpoint_notify_ha(hton, cookie);
|
||||
ib_uint64_t lsn;
|
||||
ib_uint64_t flush_lsn;
|
||||
struct pending_checkpoint * entry;
|
||||
|
||||
/* Do the allocation outside of lock to reduce contention. The normal
|
||||
case is that not everything is flushed, so we will need to enqueue. */
|
||||
entry = static_cast<struct pending_checkpoint *>
|
||||
(my_malloc(sizeof(*entry), MYF(MY_WME)));
|
||||
if (!entry) {
|
||||
sql_print_error("Failed to allocate %u bytes."
|
||||
" Commit checkpoint will be skipped.",
|
||||
static_cast<unsigned>(sizeof(*entry)));
|
||||
return;
|
||||
}
|
||||
|
||||
entry->next = NULL;
|
||||
entry->hton = hton;
|
||||
entry->cookie = cookie;
|
||||
|
||||
mysql_mutex_lock(&pending_checkpoint_mutex);
|
||||
lsn = log_get_lsn();
|
||||
flush_lsn = log_get_flush_lsn();
|
||||
if (lsn > flush_lsn) {
|
||||
/* Put the request in queue.
|
||||
When the log gets flushed past the lsn, we will remove the
|
||||
entry from the queue and notify the upper layer. */
|
||||
entry->lsn = lsn;
|
||||
if (pending_checkpoint_list_end) {
|
||||
pending_checkpoint_list_end->next = entry;
|
||||
/* There is no need to order the entries in the list
|
||||
by lsn. The upper layer can accept notifications in
|
||||
any order, and short delays in notifications do not
|
||||
significantly impact performance. */
|
||||
} else {
|
||||
pending_checkpoint_list = entry;
|
||||
}
|
||||
pending_checkpoint_list_end = entry;
|
||||
entry = NULL;
|
||||
}
|
||||
mysql_mutex_unlock(&pending_checkpoint_mutex);
|
||||
|
||||
if (entry) {
|
||||
/* We are already flushed. Notify the checkpoint immediately. */
|
||||
commit_checkpoint_notify_ha(entry->hton, entry->cookie);
|
||||
my_free(entry);
|
||||
}
|
||||
}
|
||||
|
||||
/*****************************************************************//**
|
||||
Log code calls this whenever log has been written and/or flushed up
|
||||
to a new position. We use this to notify upper layer of a new commit
|
||||
checkpoint when necessary.*/
|
||||
extern "C" UNIV_INTERN
|
||||
void
|
||||
innobase_mysql_log_notify(
|
||||
/*===============*/
|
||||
ib_uint64_t write_lsn, /*!< in: LSN written to log file */
|
||||
ib_uint64_t flush_lsn) /*!< in: LSN flushed to disk */
|
||||
{
|
||||
struct pending_checkpoint * pending;
|
||||
struct pending_checkpoint * entry;
|
||||
struct pending_checkpoint * last_ready;
|
||||
|
||||
/* It is safe to do a quick check for NULL first without lock.
|
||||
Even if we should race, we will at most skip one checkpoint and
|
||||
take the next one, which is harmless. */
|
||||
if (!pending_checkpoint_list)
|
||||
return;
|
||||
|
||||
mysql_mutex_lock(&pending_checkpoint_mutex);
|
||||
pending = pending_checkpoint_list;
|
||||
if (!pending)
|
||||
{
|
||||
mysql_mutex_unlock(&pending_checkpoint_mutex);
|
||||
return;
|
||||
}
|
||||
|
||||
last_ready = NULL;
|
||||
for (entry = pending; entry != NULL; entry = entry -> next)
|
||||
{
|
||||
/* Notify checkpoints up until the first entry that has not
|
||||
been fully flushed to the redo log. Since we do not maintain
|
||||
the list ordered, in principle there could be more entries
|
||||
later than were also flushed. But there is no harm in
|
||||
delaying notifications for those a bit. And in practise, the
|
||||
list is unlikely to have more than one element anyway, as we
|
||||
flush the redo log at least once every second. */
|
||||
if (entry->lsn > flush_lsn)
|
||||
break;
|
||||
last_ready = entry;
|
||||
}
|
||||
|
||||
if (last_ready)
|
||||
{
|
||||
/* We found some pending checkpoints that are now flushed to
|
||||
disk. So remove them from the list. */
|
||||
pending_checkpoint_list = entry;
|
||||
if (!entry)
|
||||
pending_checkpoint_list_end = NULL;
|
||||
}
|
||||
|
||||
mysql_mutex_unlock(&pending_checkpoint_mutex);
|
||||
|
||||
if (!last_ready)
|
||||
return;
|
||||
|
||||
/* Now that we have released the lock, notify upper layer about all
|
||||
commit checkpoints that have now completed. */
|
||||
for (;;) {
|
||||
entry = pending;
|
||||
pending = pending->next;
|
||||
|
||||
commit_checkpoint_notify_ha(entry->hton, entry->cookie);
|
||||
|
||||
my_free(entry);
|
||||
if (entry == last_ready)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*****************************************************************//**
|
||||
|
@ -136,6 +136,17 @@ innobase_mysql_print_thd(
|
||||
uint max_query_len); /*!< in: max query length to print, or 0 to
|
||||
use the default max length */
|
||||
|
||||
/*****************************************************************//**
|
||||
Log code calls this whenever log has been written and/or flushed up
|
||||
to a new position. We use this to notify upper layer of a new commit
|
||||
checkpoint when necessary.*/
|
||||
UNIV_INTERN
|
||||
void
|
||||
innobase_mysql_log_notify(
|
||||
/*===============*/
|
||||
ib_uint64_t write_lsn, /*!< in: LSN written to log file */
|
||||
ib_uint64_t flush_lsn); /*!< in: LSN flushed to disk */
|
||||
|
||||
/**************************************************************//**
|
||||
Converts a MySQL type to an InnoDB type. Note that this function returns
|
||||
the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1
|
||||
|
@ -151,6 +151,13 @@ UNIV_INLINE
|
||||
ib_uint64_t
|
||||
log_get_lsn(void);
|
||||
/*=============*/
|
||||
/************************************************************//**
|
||||
Gets the last lsn that is fully flushed to disk.
|
||||
@return last flushed lsn */
|
||||
UNIV_INLINE
|
||||
ib_uint64_t
|
||||
log_get_flush_lsn(void);
|
||||
/*=============*/
|
||||
/****************************************************************
|
||||
Gets the log group capacity. It is OK to read the value without
|
||||
holding log_sys->mutex because it is constant.
|
||||
|
@ -411,6 +411,25 @@ log_get_lsn(void)
|
||||
return(lsn);
|
||||
}
|
||||
|
||||
/************************************************************//**
|
||||
Gets the last lsn that is fully flushed to disk.
|
||||
@return last flushed lsn */
|
||||
UNIV_INLINE
|
||||
ib_uint64_t
|
||||
log_get_flush_lsn(void)
|
||||
/*=============*/
|
||||
{
|
||||
ib_uint64_t lsn;
|
||||
|
||||
mutex_enter(&(log_sys->mutex));
|
||||
|
||||
lsn = log_sys->flushed_to_disk_lsn;
|
||||
|
||||
mutex_exit(&(log_sys->mutex));
|
||||
|
||||
return(lsn);
|
||||
}
|
||||
|
||||
/****************************************************************
|
||||
Gets the log group capacity. It is OK to read the value without
|
||||
holding log_sys->mutex because it is constant.
|
||||
|
@ -1390,6 +1390,8 @@ log_write_up_to(
|
||||
ulint loop_count = 0;
|
||||
#endif /* UNIV_DEBUG */
|
||||
ulint unlock;
|
||||
ib_uint64_t write_lsn;
|
||||
ib_uint64_t flush_lsn;
|
||||
|
||||
if (recv_no_ibuf_operations) {
|
||||
/* Recovery is running and no operations on the log files are
|
||||
@ -1568,8 +1570,13 @@ loop:
|
||||
|
||||
log_flush_do_unlocks(unlock);
|
||||
|
||||
write_lsn = log_sys->write_lsn;
|
||||
flush_lsn = log_sys->flushed_to_disk_lsn;
|
||||
|
||||
mutex_exit(&(log_sys->mutex));
|
||||
|
||||
innobase_mysql_log_notify(write_lsn, flush_lsn);
|
||||
|
||||
return;
|
||||
|
||||
do_waits:
|
||||
|
Loading…
x
Reference in New Issue
Block a user