MDEV-36234: Add innodb_linux_aio
This controls which linux implementation to use for innodb_use_native_aio=ON. innodb_linux_aio=auto is equivalent to innodb_linux_aio=io_uring when it is available, and falling back to innodb_linux_aio=aio when not. Debian packaging is no longer aio exclusive or uring, so for those older Debian or Ubuntu releases, its a remove_uring directive. For more recent releases, add mandatory liburing for consistent packaging. WITH_LIBAIO is now an independent option from WITH_URING. is_linux_native_aio_supported(): Remove. This had originally been added in mysql/mysql-server@0da310b69d in 2012 to fix an issue where io_submit() on CentOS 5.5 would return EINVAL for a /tmp/#sql*.ibd file associated with CREATE TEMPORARY TABLE. But, starting with commit 2e814d4702d71a04388386a9f591d14a35980bfe InnoDB temporary tables will be written to innodb_temp_data_file_path. The 2012 commit said that the error could occur on "old kernels". Any GNU/Linux distribution that we currently support should be based on a newer Linux kernel; for example, Red Hat Enterprise Linux 7 was released in 2014. This is joint work with Daniel Black and Vladislav Vaintroub.
This commit is contained in:
parent
db188083c3
commit
585531d6c0
13
debian/autobake-deb.sh
vendored
13
debian/autobake-deb.sh
vendored
@ -64,11 +64,10 @@ add_lsb_base_depends()
|
||||
sed -e 's#lsof #lsb-base (>= 3.0-10),\n lsof #' -i debian/control
|
||||
}
|
||||
|
||||
replace_uring_with_aio()
|
||||
remove_uring()
|
||||
{
|
||||
sed 's/liburing-dev/libaio-dev/g' -i debian/control
|
||||
sed -e '/-DIGNORE_AIO_CHECK=ON/d' \
|
||||
-e '/-DWITH_URING=ON/d' -i debian/rules
|
||||
sed -e '/liburing-dev/d' -i debian/control
|
||||
sed -e '/-DWITH_URING=ON/d' -i debian/rules
|
||||
}
|
||||
|
||||
disable_libfmt()
|
||||
@ -116,7 +115,7 @@ in
|
||||
# Debian
|
||||
"buster")
|
||||
disable_libfmt
|
||||
replace_uring_with_aio
|
||||
remove_uring
|
||||
;&
|
||||
"bullseye")
|
||||
add_lsb_base_depends
|
||||
@ -127,7 +126,7 @@ in
|
||||
# so no removal is necessary.
|
||||
if [[ ! "$architecture" =~ amd64|arm64|armel|armhf|i386|mips64el|mipsel|ppc64el|s390x ]]
|
||||
then
|
||||
replace_uring_with_aio
|
||||
remove_uring
|
||||
fi
|
||||
;&
|
||||
"trixie"|"sid")
|
||||
@ -136,8 +135,8 @@ in
|
||||
;;
|
||||
# Ubuntu
|
||||
"focal")
|
||||
replace_uring_with_aio
|
||||
disable_libfmt
|
||||
remove_uring
|
||||
;&
|
||||
"jammy"|"kinetic")
|
||||
add_lsb_base_depends
|
||||
|
5
debian/rules
vendored
5
debian/rules
vendored
@ -87,9 +87,6 @@ endif
|
||||
# quality standards in Debian. Also building it requires an extra 4 GB of disk
|
||||
# space which makes native Debian builds fail as the total disk space needed
|
||||
# for MariaDB becomes over 10 GB. Only build CS via autobake-deb.sh.
|
||||
#
|
||||
# Note: Don't use '-DWITH_URING=ON' as some Buildbot builders are missing it
|
||||
# and would fail permanently.
|
||||
PATH=$${MYSQL_BUILD_PATH:-"/usr/lib/ccache:/usr/local/bin:/usr/bin:/bin"} \
|
||||
dh_auto_configure --builddirectory=$(BUILDDIR) -- \
|
||||
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
|
||||
@ -103,6 +100,8 @@ endif
|
||||
-DPLUGIN_AWS_KEY_MANAGEMENT=NO \
|
||||
-DPLUGIN_COLUMNSTORE=NO \
|
||||
-DIGNORE_AIO_CHECK=ON \
|
||||
-DWITH_URING=ON \
|
||||
-DWITH_LIBAIO=ON \
|
||||
-DDEB=$(DEB_VENDOR)
|
||||
|
||||
# This is needed, otherwise 'make test' will run before binaries have been built
|
||||
|
@ -378,6 +378,10 @@ extern const char *innodb_checksum_algorithm_names[];
|
||||
extern TYPELIB innodb_checksum_algorithm_typelib;
|
||||
extern const char *innodb_flush_method_names[];
|
||||
extern TYPELIB innodb_flush_method_typelib;
|
||||
#ifdef __linux__
|
||||
extern const char *innodb_linux_aio_names[];
|
||||
extern TYPELIB innodb_linux_aio_typelib;
|
||||
#endif
|
||||
|
||||
static const char *binlog_info_values[] = {"off", "lockless", "on", "auto",
|
||||
NullS};
|
||||
@ -1334,6 +1338,9 @@ enum options_xtrabackup
|
||||
OPT_INNODB_READ_IO_THREADS,
|
||||
OPT_INNODB_WRITE_IO_THREADS,
|
||||
OPT_INNODB_USE_NATIVE_AIO,
|
||||
#ifdef __linux__
|
||||
OPT_INNODB_LINUX_AIO,
|
||||
#endif
|
||||
OPT_INNODB_PAGE_SIZE,
|
||||
OPT_INNODB_BUFFER_POOL_FILENAME,
|
||||
OPT_INNODB_LOCK_WAIT_TIMEOUT,
|
||||
@ -1934,6 +1941,14 @@ struct my_option xb_server_options[] =
|
||||
(G_PTR*) &srv_use_native_aio,
|
||||
(G_PTR*) &srv_use_native_aio, 0, GET_BOOL, NO_ARG,
|
||||
TRUE, 0, 0, 0, 0, 0},
|
||||
#ifdef __linux__
|
||||
{"innodb_linux_aio", OPT_INNODB_LINUX_AIO,
|
||||
"Which linux AIO implementation to use, auto (io_uring, failing to aio) or explicit",
|
||||
(G_PTR*) &srv_linux_aio_method,
|
||||
(G_PTR*) &srv_linux_aio_method,
|
||||
&innodb_linux_aio_typelib, GET_ENUM, REQUIRED_ARG,
|
||||
SRV_LINUX_AIO_AUTO, 0, 0, 0, 0, 0},
|
||||
#endif
|
||||
{"innodb_page_size", OPT_INNODB_PAGE_SIZE,
|
||||
"The universal page size of the database.",
|
||||
(G_PTR*) &innobase_page_size, (G_PTR*) &innobase_page_size, 0,
|
||||
@ -2529,19 +2544,8 @@ static bool innodb_init_param()
|
||||
|
||||
ut_ad(DATA_MYSQL_BINARY_CHARSET_COLL == my_charset_bin.number);
|
||||
|
||||
#ifdef _WIN32
|
||||
#if defined(_WIN32) || defined(LINUX_NATIVE_AIO) || defined(HAVE_URING)
|
||||
srv_use_native_aio = TRUE;
|
||||
|
||||
#elif defined(LINUX_NATIVE_AIO)
|
||||
|
||||
if (srv_use_native_aio) {
|
||||
msg("InnoDB: Using Linux native AIO");
|
||||
}
|
||||
#elif defined(HAVE_URING)
|
||||
|
||||
if (srv_use_native_aio) {
|
||||
msg("InnoDB: Using liburing");
|
||||
}
|
||||
#else
|
||||
/* Currently native AIO is supported only on windows and linux
|
||||
and that also when the support is compiled in. In all other
|
||||
@ -5473,7 +5477,6 @@ fail:
|
||||
xb_fil_io_init();
|
||||
|
||||
if (os_aio_init()) {
|
||||
msg("Error: cannot initialize AIO subsystem");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
|
@ -4548,7 +4548,7 @@ sub extract_warning_lines ($$) {
|
||||
qr|InnoDB: io_setup\(\) attempt|,
|
||||
qr|InnoDB: io_setup\(\) failed with EAGAIN|,
|
||||
qr|io_uring_queue_init\(\) failed with|,
|
||||
qr|InnoDB: liburing disabled|,
|
||||
qr|InnoDB: io_uring failed: falling back to libaio|,
|
||||
qr/InnoDB: Failed to set O_DIRECT on file/,
|
||||
qr|setrlimit could not change the size of core files to 'infinity';|,
|
||||
qr|failed to retrieve the MAC address|,
|
||||
|
21
mysql-test/suite/sys_vars/r/innodb_linux_aio_basic.result
Normal file
21
mysql-test/suite/sys_vars/r/innodb_linux_aio_basic.result
Normal file
@ -0,0 +1,21 @@
|
||||
select @@global.innodb_linux_aio;
|
||||
@@global.innodb_linux_aio
|
||||
auto
|
||||
select @@session.innodb_linux_aio;
|
||||
ERROR HY000: Variable 'innodb_linux_aio' is a GLOBAL variable
|
||||
show global variables like 'innodb_linux_aio';
|
||||
Variable_name Value
|
||||
innodb_linux_aio auto
|
||||
show session variables like 'innodb_linux_aio';
|
||||
Variable_name Value
|
||||
innodb_linux_aio auto
|
||||
select * from information_schema.global_variables where variable_name='innodb_linux_aio';
|
||||
VARIABLE_NAME VARIABLE_VALUE
|
||||
INNODB_LINUX_AIO auto
|
||||
select * from information_schema.session_variables where variable_name='innodb_linux_aio';
|
||||
VARIABLE_NAME VARIABLE_VALUE
|
||||
INNODB_LINUX_AIO auto
|
||||
set global innodb_linux_aio='auto';
|
||||
ERROR HY000: Variable 'innodb_linux_aio' is a read only variable
|
||||
set session innodb_linux_aio='aio';
|
||||
ERROR HY000: Variable 'innodb_linux_aio' is a read only variable
|
@ -5,6 +5,7 @@ variable_name not in (
|
||||
'innodb_evict_tables_on_commit_debug', # one may want to override this
|
||||
'innodb_use_native_aio', # default value depends on OS
|
||||
'innodb_log_file_buffering', # only available on Linux and Windows
|
||||
'innodb_linux_aio', # existence depends on OS
|
||||
'innodb_buffer_pool_load_pages_abort') # debug build only, and is only for testing
|
||||
order by variable_name;
|
||||
VARIABLE_NAME INNODB_ADAPTIVE_FLUSHING
|
||||
|
23
mysql-test/suite/sys_vars/t/innodb_linux_aio_basic.test
Normal file
23
mysql-test/suite/sys_vars/t/innodb_linux_aio_basic.test
Normal file
@ -0,0 +1,23 @@
|
||||
--source include/have_innodb.inc
|
||||
--source include/linux.inc
|
||||
# enum readonly
|
||||
|
||||
#
|
||||
# show values;
|
||||
#
|
||||
select @@global.innodb_linux_aio;
|
||||
--error ER_INCORRECT_GLOBAL_LOCAL_VAR
|
||||
select @@session.innodb_linux_aio;
|
||||
show global variables like 'innodb_linux_aio';
|
||||
show session variables like 'innodb_linux_aio';
|
||||
select * from information_schema.global_variables where variable_name='innodb_linux_aio';
|
||||
select * from information_schema.session_variables where variable_name='innodb_linux_aio';
|
||||
|
||||
#
|
||||
# show that it's read-only
|
||||
#
|
||||
--error ER_INCORRECT_GLOBAL_LOCAL_VAR
|
||||
set global innodb_linux_aio='auto';
|
||||
--error ER_INCORRECT_GLOBAL_LOCAL_VAR
|
||||
set session innodb_linux_aio='aio';
|
||||
|
@ -16,5 +16,6 @@ select VARIABLE_NAME, SESSION_VALUE, DEFAULT_VALUE, VARIABLE_SCOPE, VARIABLE_TYP
|
||||
'innodb_evict_tables_on_commit_debug', # one may want to override this
|
||||
'innodb_use_native_aio', # default value depends on OS
|
||||
'innodb_log_file_buffering', # only available on Linux and Windows
|
||||
'innodb_linux_aio', # existence depends on OS
|
||||
'innodb_buffer_pool_load_pages_abort') # debug build only, and is only for testing
|
||||
order by variable_name;
|
||||
|
@ -315,6 +315,25 @@ static TYPELIB innodb_stats_method_typelib = {
|
||||
NULL
|
||||
};
|
||||
|
||||
/** Possible values for system variable "innodb_linux_aio" */
|
||||
#ifdef __linux__
|
||||
const char* innodb_linux_aio_names[] = {
|
||||
"auto", /* SRV_LINUX_AIO_AUTO */
|
||||
"io_uring", /* SRV_LINUX_AIO_IO_URING */
|
||||
"aio", /* SRV_LINUX_AIO_LIBAIO */
|
||||
NullS
|
||||
};
|
||||
|
||||
/** Used to define an enumerate type of the system variable
|
||||
innodb_linux_aio. Used by mariadb-backup too. */
|
||||
TYPELIB innodb_linux_aio_typelib = {
|
||||
array_elements(innodb_linux_aio_names) - 1,
|
||||
"innodb_linux_aio_typelib",
|
||||
innodb_linux_aio_names,
|
||||
NULL
|
||||
};
|
||||
#endif
|
||||
|
||||
/** Possible values of the parameter innodb_checksum_algorithm */
|
||||
const char* innodb_checksum_algorithm_names[] = {
|
||||
"crc32",
|
||||
@ -19664,6 +19683,15 @@ static MYSQL_SYSVAR_BOOL(use_native_aio, srv_use_native_aio,
|
||||
"Use native AIO if supported on this platform.",
|
||||
NULL, NULL, TRUE);
|
||||
|
||||
#ifdef __linux__
|
||||
static MYSQL_SYSVAR_ENUM(linux_aio, srv_linux_aio_method,
|
||||
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
|
||||
"Specifies which Linux AIO implementation should be used."
|
||||
" Possible value are \"auto\" (default) to select io_uring"
|
||||
" and fallback to aio, or explicit \"io_uring\" or \"aio\"",
|
||||
nullptr, nullptr, SRV_LINUX_AIO_AUTO, &innodb_linux_aio_typelib);
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_LIBNUMA
|
||||
static MYSQL_SYSVAR_BOOL(numa_interleave, srv_numa_interleave,
|
||||
PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
|
||||
@ -20059,6 +20087,9 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
|
||||
MYSQL_SYSVAR(tmpdir),
|
||||
MYSQL_SYSVAR(autoinc_lock_mode),
|
||||
MYSQL_SYSVAR(use_native_aio),
|
||||
#ifdef __linux__
|
||||
MYSQL_SYSVAR(linux_aio),
|
||||
#endif
|
||||
#ifdef HAVE_LIBNUMA
|
||||
MYSQL_SYSVAR(numa_interleave),
|
||||
#endif /* HAVE_LIBNUMA */
|
||||
|
@ -77,6 +77,19 @@ enum srv_flush_t
|
||||
#endif
|
||||
};
|
||||
|
||||
/** Possible values of innodb_linux_aio */
|
||||
#ifdef __linux__
|
||||
enum srv_linux_aio_t
|
||||
{
|
||||
/** auto, io_uring first and then aio */
|
||||
SRV_LINUX_AIO_AUTO,
|
||||
/** io_uring */
|
||||
SRV_LINUX_AIO_IO_URING,
|
||||
/** aio (libaio interface) */
|
||||
SRV_LINUX_AIO_LIBAIO
|
||||
};
|
||||
#endif
|
||||
|
||||
/** innodb_flush_method */
|
||||
extern ulong srv_file_flush_method;
|
||||
|
||||
|
@ -178,6 +178,12 @@ OS (provided we compiled Innobase with it in), otherwise we will
|
||||
use simulated aio.
|
||||
Currently we support native aio on windows and linux */
|
||||
extern my_bool srv_use_native_aio;
|
||||
|
||||
#ifdef __linux__
|
||||
/* This enum is defined which linux native io method to use */
|
||||
extern ulong srv_linux_aio_method;
|
||||
#endif
|
||||
|
||||
extern my_bool srv_numa_interleave;
|
||||
|
||||
/* Use atomic writes i.e disable doublewrite buffer */
|
||||
|
@ -52,10 +52,6 @@ Created 10/21/1995 Heikki Tuuri
|
||||
|
||||
#include <tpool_structs.h>
|
||||
|
||||
#ifdef LINUX_NATIVE_AIO
|
||||
#include <libaio.h>
|
||||
#endif /* LINUX_NATIVE_AIO */
|
||||
|
||||
#ifdef HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE
|
||||
# include <fcntl.h>
|
||||
# include <linux/falloc.h>
|
||||
@ -3084,132 +3080,6 @@ static void write_io_callback(void *c)
|
||||
write_slots->release(cb);
|
||||
}
|
||||
|
||||
#ifdef LINUX_NATIVE_AIO
|
||||
/** Checks if the system supports native linux aio. On some kernel
|
||||
versions where native aio is supported it won't work on tmpfs. In such
|
||||
cases we can't use native aio.
|
||||
|
||||
@return: true if supported, false otherwise. */
|
||||
static bool is_linux_native_aio_supported()
|
||||
{
|
||||
File fd;
|
||||
io_context_t io_ctx;
|
||||
std::string log_file_path = get_log_file_path();
|
||||
|
||||
memset(&io_ctx, 0, sizeof(io_ctx));
|
||||
if (io_setup(1, &io_ctx)) {
|
||||
|
||||
/* The platform does not support native aio. */
|
||||
|
||||
return(false);
|
||||
|
||||
}
|
||||
else if (!srv_read_only_mode) {
|
||||
|
||||
/* Now check if tmpdir supports native aio ops. */
|
||||
fd = mysql_tmpfile("ib");
|
||||
|
||||
if (fd < 0) {
|
||||
ib::warn()
|
||||
<< "Unable to create temp file to check"
|
||||
" native AIO support.";
|
||||
|
||||
int ret = io_destroy(io_ctx);
|
||||
ut_a(ret != -EINVAL);
|
||||
ut_ad(ret != -EFAULT);
|
||||
|
||||
return(false);
|
||||
}
|
||||
}
|
||||
else {
|
||||
fd = my_open(log_file_path.c_str(), O_RDONLY | O_CLOEXEC,
|
||||
MYF(0));
|
||||
|
||||
if (fd == -1) {
|
||||
|
||||
ib::warn() << "Unable to open \"" << log_file_path
|
||||
<< "\" to check native"
|
||||
<< " AIO read support.";
|
||||
|
||||
int ret = io_destroy(io_ctx);
|
||||
ut_a(ret != EINVAL);
|
||||
ut_ad(ret != EFAULT);
|
||||
|
||||
return(false);
|
||||
}
|
||||
}
|
||||
|
||||
struct io_event io_event;
|
||||
|
||||
memset(&io_event, 0x0, sizeof(io_event));
|
||||
|
||||
byte* ptr = static_cast<byte*>(aligned_malloc(srv_page_size,
|
||||
srv_page_size));
|
||||
|
||||
struct iocb iocb;
|
||||
|
||||
/* Suppress valgrind warning. */
|
||||
memset(ptr, 0, srv_page_size);
|
||||
memset(&iocb, 0x0, sizeof(iocb));
|
||||
|
||||
struct iocb* p_iocb = &iocb;
|
||||
|
||||
if (!srv_read_only_mode) {
|
||||
|
||||
io_prep_pwrite(p_iocb, fd, ptr, srv_page_size, 0);
|
||||
|
||||
}
|
||||
else {
|
||||
ut_a(srv_page_size >= 512);
|
||||
io_prep_pread(p_iocb, fd, ptr, 512, 0);
|
||||
}
|
||||
|
||||
int err = io_submit(io_ctx, 1, &p_iocb);
|
||||
|
||||
if (err >= 1) {
|
||||
/* Now collect the submitted IO request. */
|
||||
err = io_getevents(io_ctx, 1, 1, &io_event, NULL);
|
||||
}
|
||||
|
||||
aligned_free(ptr);
|
||||
my_close(fd, MYF(MY_WME));
|
||||
|
||||
switch (err) {
|
||||
case 1:
|
||||
{
|
||||
int ret = io_destroy(io_ctx);
|
||||
ut_a(ret != -EINVAL);
|
||||
ut_ad(ret != -EFAULT);
|
||||
|
||||
return(true);
|
||||
}
|
||||
|
||||
case -EINVAL:
|
||||
case -ENOSYS:
|
||||
ib::warn()
|
||||
<< "Linux Native AIO not supported. You can either"
|
||||
" move "
|
||||
<< (srv_read_only_mode ? log_file_path : "tmpdir")
|
||||
<< " to a file system that supports native"
|
||||
" AIO or you can set innodb_use_native_aio to"
|
||||
" FALSE to avoid this message.";
|
||||
|
||||
/* fall through. */
|
||||
default:
|
||||
ib::warn()
|
||||
<< "Linux Native AIO check on "
|
||||
<< (srv_read_only_mode ? log_file_path : "tmpdir")
|
||||
<< "returned error[" << -err << "]";
|
||||
}
|
||||
|
||||
int ret = io_destroy(io_ctx);
|
||||
ut_a(ret != -EINVAL);
|
||||
ut_ad(ret != -EFAULT);
|
||||
|
||||
return(false);
|
||||
}
|
||||
#endif
|
||||
|
||||
int os_aio_init() noexcept
|
||||
{
|
||||
int max_write_events= int(srv_n_write_io_threads *
|
||||
@ -3217,41 +3087,56 @@ int os_aio_init() noexcept
|
||||
int max_read_events= int(srv_n_read_io_threads *
|
||||
OS_AIO_N_PENDING_IOS_PER_THREAD);
|
||||
int max_events= max_read_events + max_write_events;
|
||||
int ret;
|
||||
#if LINUX_NATIVE_AIO
|
||||
if (srv_use_native_aio && !is_linux_native_aio_supported())
|
||||
goto disable;
|
||||
#endif
|
||||
int ret= 1;
|
||||
|
||||
ret= srv_thread_pool->configure_aio(srv_use_native_aio, max_events);
|
||||
|
||||
#ifdef LINUX_NATIVE_AIO
|
||||
if (ret)
|
||||
#if defined __linux__ && (defined HAVE_URING || defined LINUX_NATIVE_AIO)
|
||||
if (srv_use_native_aio)
|
||||
{
|
||||
ut_ad(srv_use_native_aio);
|
||||
disable:
|
||||
ib::warn() << "Linux Native AIO disabled.";
|
||||
srv_use_native_aio= false;
|
||||
ret= srv_thread_pool->configure_aio(false, max_events);
|
||||
switch (srv_linux_aio_method) {
|
||||
case SRV_LINUX_AIO_AUTO:
|
||||
case SRV_LINUX_AIO_IO_URING:
|
||||
# ifdef HAVE_URING
|
||||
ret= srv_thread_pool->configure_aio(srv_use_native_aio, max_events,
|
||||
tpool::OS_IO_URING);
|
||||
# endif
|
||||
# ifdef LINUX_NATIVE_AIO
|
||||
# ifdef HAVE_URING
|
||||
if (ret && srv_linux_aio_method == SRV_LINUX_AIO_AUTO)
|
||||
sql_print_warning("InnoDB: io_uring failed: falling back to libaio");
|
||||
else
|
||||
break;
|
||||
/* fallthough */
|
||||
# endif /* HAVE_URING */
|
||||
case SRV_LINUX_AIO_LIBAIO:
|
||||
ret= srv_thread_pool->configure_aio(srv_use_native_aio, max_events,
|
||||
tpool::OS_AIO);
|
||||
# endif
|
||||
}
|
||||
if (ret)
|
||||
{
|
||||
srv_use_native_aio= false;
|
||||
sql_print_warning("InnoDB: native AIO failed: falling back to"
|
||||
" innodb_use_native_aio=OFF");
|
||||
}
|
||||
else
|
||||
sql_print_information("InnoDB: Using %s", srv_thread_pool
|
||||
->get_aio_implementation());
|
||||
}
|
||||
#endif
|
||||
#endif /* linux */
|
||||
|
||||
#ifdef HAVE_URING
|
||||
if (ret)
|
||||
{
|
||||
ut_ad(srv_use_native_aio);
|
||||
ib::warn()
|
||||
<< "liburing disabled: falling back to innodb_use_native_aio=OFF";
|
||||
srv_use_native_aio= false;
|
||||
ret= srv_thread_pool->configure_aio(false, max_events);
|
||||
}
|
||||
#endif
|
||||
ret= srv_thread_pool->configure_aio(srv_use_native_aio,
|
||||
max_events,
|
||||
tpool::OS_DEFAULT);
|
||||
|
||||
if (!ret)
|
||||
{
|
||||
read_slots= new io_slots(max_read_events, srv_n_read_io_threads);
|
||||
write_slots= new io_slots(max_write_events, srv_n_write_io_threads);
|
||||
}
|
||||
else
|
||||
sql_print_error("InnoDB: Cannot initialize AIO sub-system");
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -3290,8 +3175,8 @@ int os_aio_resize(ulint n_reader_threads, ulint n_writer_threads) noexcept
|
||||
int max_write_events= int(n_writer_threads * OS_AIO_N_PENDING_IOS_PER_THREAD);
|
||||
int events= max_read_events + max_write_events;
|
||||
|
||||
/** Do the Linux AIO dance (this will try to create a new
|
||||
io context with changed max_events ,etc*/
|
||||
/* Do the Linux AIO dance (this will try to create a new
|
||||
io context with changed max_events, etc.) */
|
||||
|
||||
int ret= srv_thread_pool->reconfigure_aio(srv_use_native_aio, events);
|
||||
|
||||
|
@ -137,6 +137,10 @@ OS (provided we compiled Innobase with it in), otherwise we will
|
||||
use simulated aio we build below with threads.
|
||||
Currently we support native aio on windows and linux */
|
||||
my_bool srv_use_native_aio;
|
||||
#ifdef __linux__
|
||||
/* This enum is defined which linux native io method to use */
|
||||
ulong srv_linux_aio_method;
|
||||
#endif
|
||||
my_bool srv_numa_interleave;
|
||||
/** copy of innodb_use_atomic_writes; @see innodb_init_params() */
|
||||
my_bool srv_use_atomic_writes;
|
||||
|
@ -1287,22 +1287,9 @@ dberr_t srv_start(bool create_new_db)
|
||||
}
|
||||
|
||||
if (os_aio_init()) {
|
||||
ib::error() << "Cannot initialize AIO sub-system";
|
||||
|
||||
return(srv_init_abort(DB_ERROR));
|
||||
}
|
||||
|
||||
#ifdef LINUX_NATIVE_AIO
|
||||
if (srv_use_native_aio) {
|
||||
ib::info() << "Using Linux native AIO";
|
||||
}
|
||||
#endif
|
||||
#ifdef HAVE_URING
|
||||
if (srv_use_native_aio) {
|
||||
ib::info() << "Using liburing";
|
||||
}
|
||||
#endif
|
||||
|
||||
fil_system.create(srv_file_per_table ? 50000 : 5000);
|
||||
|
||||
if (buf_pool.create()) {
|
||||
|
@ -3,16 +3,18 @@ IF(WIN32)
|
||||
SET(EXTRA_SOURCES tpool_win.cc aio_win.cc)
|
||||
ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Linux")
|
||||
OPTION(WITH_URING "Require that io_uring be used" OFF)
|
||||
OPTION(WITH_LIBAIO "Require that libaio is used, unless uring is there" OFF)
|
||||
OPTION(WITH_LIBAIO "Require that libaio is used" OFF)
|
||||
IF(WITH_URING)
|
||||
SET(URING_REQUIRED REQUIRED)
|
||||
ELSEIF(WITH_LIBAIO)
|
||||
ENDIF()
|
||||
IF(WITH_LIBAIO)
|
||||
SET(LIBAIO_REQUIRED REQUIRED)
|
||||
ENDIF()
|
||||
SET(EXTRA_SOURCES)
|
||||
FIND_PACKAGE(URING QUIET ${URING_REQUIRED})
|
||||
IF(URING_FOUND)
|
||||
SET(URING_FOUND ${URING_FOUND} PARENT_SCOPE)
|
||||
SET(TPOOL_DEFINES "-DHAVE_URING" PARENT_SCOPE)
|
||||
SET(TPOOL_DEFINES "-DHAVE_URING")
|
||||
ADD_DEFINITIONS(-DHAVE_URING)
|
||||
LINK_LIBRARIES(${URING_LIBRARIES})
|
||||
INCLUDE_DIRECTORIES(${URING_INCLUDE_DIRS})
|
||||
@ -27,16 +29,16 @@ ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Linux")
|
||||
IF(HAVE_IO_URING_MLOCK_SIZE)
|
||||
SET_SOURCE_FILES_PROPERTIES(aio_liburing.cc PROPERTIES COMPILE_FLAGS "-DHAVE_IO_URING_MLOCK_SIZE")
|
||||
ENDIF()
|
||||
ELSE()
|
||||
FIND_PACKAGE(LIBAIO QUIET ${LIBAIO_REQUIRED})
|
||||
IF(LIBAIO_FOUND)
|
||||
SET(TPOOL_DEFINES "-DLINUX_NATIVE_AIO" PARENT_SCOPE)
|
||||
ADD_DEFINITIONS(-DLINUX_NATIVE_AIO)
|
||||
INCLUDE_DIRECTORIES(${LIBAIO_INCLUDE_DIRS})
|
||||
LINK_LIBRARIES(${LIBAIO_LIBRARIES})
|
||||
SET(EXTRA_SOURCES aio_linux.cc)
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
FIND_PACKAGE(LIBAIO QUIET ${LIBAIO_REQUIRED})
|
||||
IF(LIBAIO_FOUND)
|
||||
SET(TPOOL_DEFINES ${TPOOL_DEFINES} "-DLINUX_NATIVE_AIO")
|
||||
ADD_DEFINITIONS(-DLINUX_NATIVE_AIO)
|
||||
INCLUDE_DIRECTORIES(${LIBAIO_INCLUDE_DIRS})
|
||||
LINK_LIBRARIES(${LIBAIO_LIBRARIES})
|
||||
SET(EXTRA_SOURCES ${EXTRA_SOURCES} aio_linux.cc)
|
||||
ENDIF()
|
||||
SET(TPOOL_DEFINES ${TPOOL_DEFINES} PARENT_SCOPE)
|
||||
ENDIF()
|
||||
|
||||
ADD_LIBRARY(tpool STATIC
|
||||
|
@ -79,8 +79,9 @@ public:
|
||||
|
||||
thread_= std::thread(thread_routine, this);
|
||||
}
|
||||
const char *get_implementation() const override { return "io_uring"; };
|
||||
|
||||
~aio_uring() noexcept
|
||||
~aio_uring() noexcept override
|
||||
{
|
||||
{
|
||||
std::lock_guard<std::mutex> _(mutex_);
|
||||
@ -103,8 +104,8 @@ public:
|
||||
|
||||
int submit_io(tpool::aiocb *cb) final
|
||||
{
|
||||
cb->iov_base= cb->m_buffer;
|
||||
cb->iov_len= cb->m_len;
|
||||
cb->m_iovec.iov_base= cb->m_buffer;
|
||||
cb->m_iovec.iov_len= cb->m_len;
|
||||
|
||||
// The whole operation since io_uring_get_sqe() and till io_uring_submit()
|
||||
// must be atomical. This is because liburing provides thread-unsafe calls.
|
||||
@ -112,11 +113,9 @@ public:
|
||||
|
||||
io_uring_sqe *sqe= io_uring_get_sqe(&uring_);
|
||||
if (cb->m_opcode == tpool::aio_opcode::AIO_PREAD)
|
||||
io_uring_prep_readv(sqe, cb->m_fh, static_cast<struct iovec *>(cb), 1,
|
||||
cb->m_offset);
|
||||
io_uring_prep_readv(sqe, cb->m_fh, &cb->m_iovec, 1, cb->m_offset);
|
||||
else
|
||||
io_uring_prep_writev(sqe, cb->m_fh, static_cast<struct iovec *>(cb), 1,
|
||||
cb->m_offset);
|
||||
io_uring_prep_writev(sqe, cb->m_fh, &cb->m_iovec, 1, cb->m_offset);
|
||||
io_uring_sqe_set_data(sqe, cb);
|
||||
|
||||
return io_uring_submit(&uring_) == 1 ? 0 : -1;
|
||||
@ -203,11 +202,27 @@ private:
|
||||
namespace tpool
|
||||
{
|
||||
|
||||
aio *create_linux_aio(thread_pool *pool, int max_aio)
|
||||
#ifdef LINUX_NATIVE_AIO
|
||||
aio *create_libaio(thread_pool* tp, int max_io);
|
||||
#endif
|
||||
|
||||
aio *create_linux_aio(thread_pool *pool, int max_aio,
|
||||
aio_implementation implementation)
|
||||
{
|
||||
try {
|
||||
return new aio_uring(pool, max_aio);
|
||||
} catch (std::runtime_error& error) {
|
||||
switch (implementation) {
|
||||
case OS_DEFAULT:
|
||||
case OS_IO_URING:
|
||||
try {
|
||||
return new aio_uring(pool, max_aio);
|
||||
} catch (std::runtime_error&) {
|
||||
return nullptr;
|
||||
}
|
||||
break;
|
||||
#ifdef LINUX_NATIVE_AIO
|
||||
case OS_AIO:
|
||||
return create_libaio(pool, max_aio);
|
||||
#endif
|
||||
default:
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
@ -13,14 +13,9 @@ You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111 - 1301 USA*/
|
||||
|
||||
#include "tpool_structs.h"
|
||||
#include "tpool.h"
|
||||
|
||||
# include <thread>
|
||||
# include <atomic>
|
||||
# include <cstdio>
|
||||
# include <libaio.h>
|
||||
# include <sys/syscall.h>
|
||||
#include <thread>
|
||||
#include <sys/syscall.h>
|
||||
|
||||
/**
|
||||
Invoke the io_getevents() system call, without timeout parameter.
|
||||
@ -58,6 +53,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111 - 1301 USA*/
|
||||
will make this version of io_getevents return EINVAL.
|
||||
*/
|
||||
static int my_getevents(io_context_t ctx, long min_nr, long nr, io_event *ev)
|
||||
noexcept
|
||||
{
|
||||
int saved_errno= errno;
|
||||
int ret= syscall(__NR_io_getevents, reinterpret_cast<long>(ctx),
|
||||
@ -81,8 +77,9 @@ static int my_getevents(io_context_t ctx, long min_nr, long nr, io_event *ev)
|
||||
with io_getevents() and forward io completion callback to
|
||||
the worker threadpool.
|
||||
*/
|
||||
namespace tpool
|
||||
namespace
|
||||
{
|
||||
using namespace tpool;
|
||||
|
||||
class aio_linux final : public aio
|
||||
{
|
||||
@ -120,7 +117,7 @@ class aio_linux final : public aio
|
||||
for (int i= 0; i < ret; i++)
|
||||
{
|
||||
const io_event &event= events[i];
|
||||
aiocb *iocb= static_cast<aiocb*>(event.obj);
|
||||
aiocb *iocb= reinterpret_cast<aiocb*>(event.obj);
|
||||
if (static_cast<int>(event.res) < 0)
|
||||
{
|
||||
iocb->m_err= -event.res;
|
||||
@ -160,11 +157,10 @@ public:
|
||||
|
||||
int submit_io(aiocb *cb) override
|
||||
{
|
||||
io_prep_pread(static_cast<iocb*>(cb), cb->m_fh, cb->m_buffer, cb->m_len,
|
||||
cb->m_offset);
|
||||
io_prep_pread(&cb->m_iocb, cb->m_fh, cb->m_buffer, cb->m_len, cb->m_offset);
|
||||
if (cb->m_opcode != aio_opcode::AIO_PREAD)
|
||||
cb->aio_lio_opcode= IO_CMD_PWRITE;
|
||||
iocb *icb= static_cast<iocb*>(cb);
|
||||
cb->m_iocb.aio_lio_opcode= IO_CMD_PWRITE;
|
||||
iocb *icb= &cb->m_iocb;
|
||||
int ret= io_submit(m_io_ctx, 1, &icb);
|
||||
if (ret == 1)
|
||||
return 0;
|
||||
@ -174,11 +170,20 @@ public:
|
||||
|
||||
int bind(native_file_handle&) override { return 0; }
|
||||
int unbind(const native_file_handle&) override { return 0; }
|
||||
const char *get_implementation() const override { return "Linux native AIO"; };
|
||||
};
|
||||
|
||||
std::atomic<bool> aio_linux::shutdown_in_progress;
|
||||
}
|
||||
|
||||
aio *create_linux_aio(thread_pool *pool, int max_io)
|
||||
namespace tpool
|
||||
{
|
||||
|
||||
#ifdef HAVE_URING
|
||||
aio *create_libaio(thread_pool *pool, int max_io)
|
||||
#else
|
||||
aio *create_linux_aio(thread_pool *pool, int max_io, aio_implementation)
|
||||
#endif
|
||||
{
|
||||
io_context_t ctx;
|
||||
memset(&ctx, 0, sizeof ctx);
|
||||
@ -187,6 +192,7 @@ aio *create_linux_aio(thread_pool *pool, int max_io)
|
||||
fprintf(stderr, "io_setup(%d) returned %d\n", max_io, ret);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return new aio_linux(ctx, pool);
|
||||
}
|
||||
}
|
||||
|
@ -154,6 +154,7 @@ public:
|
||||
|
||||
int bind(native_file_handle &fd) override { return 0; }
|
||||
int unbind(const native_file_handle &fd) override { return 0; }
|
||||
const char *get_implementation() const override { return "simulated"; }
|
||||
};
|
||||
|
||||
aio *create_simulated_aio(thread_pool *tp)
|
||||
|
@ -131,6 +131,7 @@ public:
|
||||
: GetLastError();
|
||||
}
|
||||
int unbind(const native_file_handle& fd) override { return 0; }
|
||||
const char *get_implementation() const override { return "completion ports"; }
|
||||
};
|
||||
|
||||
aio* create_win_aio(thread_pool* pool, int max_io)
|
||||
|
@ -128,12 +128,21 @@ constexpr size_t MAX_AIO_USERDATA_LEN= 4 * sizeof(void*);
|
||||
struct aiocb
|
||||
#ifdef _WIN32
|
||||
:OVERLAPPED
|
||||
#elif defined LINUX_NATIVE_AIO
|
||||
:iocb
|
||||
#elif defined HAVE_URING
|
||||
:iovec
|
||||
#endif
|
||||
{
|
||||
#if defined LINUX_NATIVE_AIO || defined HAVE_URING
|
||||
union {
|
||||
# ifdef LINUX_NATIVE_AIO
|
||||
/** The context between io_submit() and io_getevents();
|
||||
must be the first data member! */
|
||||
iocb m_iocb;
|
||||
# endif
|
||||
# ifdef HAVE_URING
|
||||
/** The context between io_uring_submit() and io_uring_wait_cqe() */
|
||||
iovec m_iovec;
|
||||
# endif
|
||||
};
|
||||
#endif
|
||||
native_file_handle m_fh;
|
||||
aio_opcode m_opcode;
|
||||
unsigned long long m_offset;
|
||||
@ -173,6 +182,7 @@ public:
|
||||
virtual int bind(native_file_handle &fd)= 0;
|
||||
/** "Unind" file to AIO handler (used on Windows only) */
|
||||
virtual int unbind(const native_file_handle &fd)= 0;
|
||||
virtual const char *get_implementation() const=0;
|
||||
virtual ~aio(){};
|
||||
protected:
|
||||
static void synchronous(aiocb *cb);
|
||||
@ -202,12 +212,21 @@ class thread_pool;
|
||||
|
||||
extern aio *create_simulated_aio(thread_pool *tp);
|
||||
|
||||
enum aio_implementation
|
||||
{
|
||||
OS_DEFAULT
|
||||
#ifdef __linux__
|
||||
, OS_IO_URING
|
||||
, OS_AIO
|
||||
#endif
|
||||
};
|
||||
|
||||
class thread_pool
|
||||
{
|
||||
protected:
|
||||
/* AIO handler */
|
||||
std::unique_ptr<aio> m_aio;
|
||||
virtual aio *create_native_aio(int max_io)= 0;
|
||||
std::unique_ptr<aio> m_aio{};
|
||||
virtual aio *create_native_aio(int max_io, aio_implementation)= 0;
|
||||
|
||||
public:
|
||||
/**
|
||||
@ -217,10 +236,7 @@ public:
|
||||
void (*m_worker_init_callback)(void)= [] {};
|
||||
void (*m_worker_destroy_callback)(void)= [] {};
|
||||
|
||||
thread_pool()
|
||||
: m_aio()
|
||||
{
|
||||
}
|
||||
thread_pool()= default;
|
||||
virtual void submit_task(task *t)= 0;
|
||||
virtual timer* create_timer(callback_func func, void *data=nullptr) = 0;
|
||||
void set_thread_callbacks(void (*init)(), void (*destroy)())
|
||||
@ -230,10 +246,10 @@ public:
|
||||
m_worker_init_callback= init;
|
||||
m_worker_destroy_callback= destroy;
|
||||
}
|
||||
int configure_aio(bool use_native_aio, int max_io)
|
||||
int configure_aio(bool use_native_aio, int max_io, aio_implementation impl)
|
||||
{
|
||||
if (use_native_aio)
|
||||
m_aio.reset(create_native_aio(max_io));
|
||||
m_aio.reset(create_native_aio(max_io, impl));
|
||||
else
|
||||
m_aio.reset(create_simulated_aio(this));
|
||||
return !m_aio ? -1 : 0;
|
||||
@ -244,7 +260,12 @@ public:
|
||||
assert(m_aio);
|
||||
if (use_native_aio)
|
||||
{
|
||||
auto new_aio = create_native_aio(max_io);
|
||||
const aio_implementation impl=
|
||||
#ifdef LINUX_NATIVE_AIO
|
||||
!strcmp(get_aio_implementation(), "Linux native AIO") ? OS_AIO :
|
||||
#endif
|
||||
OS_DEFAULT;
|
||||
auto new_aio= create_native_aio(max_io, impl);
|
||||
if (!new_aio)
|
||||
return -1;
|
||||
m_aio.reset(new_aio);
|
||||
@ -256,6 +277,10 @@ public:
|
||||
{
|
||||
m_aio.reset();
|
||||
}
|
||||
const char *get_aio_implementation() const
|
||||
{
|
||||
return m_aio->get_implementation();
|
||||
}
|
||||
|
||||
/**
|
||||
Tweaks how fast worker threads are created, or how often they are signaled.
|
||||
|
@ -39,14 +39,14 @@ namespace tpool
|
||||
{
|
||||
|
||||
#ifdef __linux__
|
||||
#if defined(HAVE_URING) || defined(LINUX_NATIVE_AIO)
|
||||
extern aio* create_linux_aio(thread_pool* tp, int max_io);
|
||||
#else
|
||||
aio *create_linux_aio(thread_pool *, int) { return nullptr; };
|
||||
#endif
|
||||
#endif
|
||||
#ifdef _WIN32
|
||||
extern aio* create_win_aio(thread_pool* tp, int max_io);
|
||||
# if defined(HAVE_URING) || defined(LINUX_NATIVE_AIO)
|
||||
aio *create_linux_aio(thread_pool* tp, int max_io, aio_implementation);
|
||||
# else
|
||||
static aio *create_linux_aio(thread_pool *, int, aio_implementation)
|
||||
{ return nullptr; }
|
||||
# endif
|
||||
#elif defined _WIN32
|
||||
aio *create_win_aio(thread_pool* tp, int max_io);
|
||||
#endif
|
||||
|
||||
static const std::chrono::milliseconds LONG_TASK_DURATION = std::chrono::milliseconds(500);
|
||||
@ -299,16 +299,15 @@ public:
|
||||
void wait_begin() override;
|
||||
void wait_end() override;
|
||||
void submit_task(task *task) override;
|
||||
aio *create_native_aio(int max_io) override
|
||||
{
|
||||
#ifdef _WIN32
|
||||
return create_win_aio(this, max_io);
|
||||
#elif defined(__linux__)
|
||||
return create_linux_aio(this,max_io);
|
||||
aio *create_native_aio(int max_io, aio_implementation) override
|
||||
{ return create_win_aio(this, max_io); }
|
||||
#elif defined __linux__
|
||||
aio *create_native_aio(int max_io, aio_implementation impl) override
|
||||
{ return create_linux_aio(this, max_io, impl); }
|
||||
#else
|
||||
return nullptr;
|
||||
aio *create_native_aio(int, aio_implementation) override { return nullptr; }
|
||||
#endif
|
||||
}
|
||||
|
||||
class timer_generic : public thr_timer_t, public timer
|
||||
{
|
||||
|
@ -206,6 +206,11 @@ class thread_pool_win : public thread_pool
|
||||
CloseThreadpoolIo(fd.m_ptp_io);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
Expose implementation.
|
||||
*/
|
||||
const char *get_implementation() const override { return "ThreadPool"; }
|
||||
};
|
||||
|
||||
PTP_POOL m_ptp_pool;
|
||||
@ -268,7 +273,7 @@ public:
|
||||
abort();
|
||||
}
|
||||
|
||||
aio *create_native_aio(int max_io) override
|
||||
aio *create_native_aio(int max_io, aio_implementation) override
|
||||
{
|
||||
return new native_aio(*this, max_io);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user