MDEV-36234: Add innodb_linux_aio

This controls which linux implementation to use for
innodb_use_native_aio=ON.

innodb_linux_aio=auto is equivalent to innodb_linux_aio=io_uring when
it is available, and falling back to innodb_linux_aio=aio when not.

Debian packaging is no longer aio exclusive or uring, so
for those older Debian or Ubuntu releases, its a remove_uring directive.
For more recent releases, add mandatory liburing for consistent packaging.

WITH_LIBAIO is now an independent option from WITH_URING.

is_linux_native_aio_supported(): Remove. This had originally been added in
mysql/mysql-server@0da310b69d in 2012
to fix an issue where io_submit() on CentOS 5.5 would return EINVAL
for a /tmp/#sql*.ibd file associated with CREATE TEMPORARY TABLE.
But, starting with commit 2e814d4702d71a04388386a9f591d14a35980bfe InnoDB
temporary tables will be written to innodb_temp_data_file_path.
The 2012 commit said that the error could occur on "old kernels".
Any GNU/Linux distribution that we currently support should be based
on a newer Linux kernel; for example, Red Hat Enterprise Linux 7
was released in 2014.

This is joint work with Daniel Black and Vladislav Vaintroub.
This commit is contained in:
Marko Mäkelä 2025-05-26 13:53:41 +03:00
parent db188083c3
commit 585531d6c0
22 changed files with 286 additions and 259 deletions

View File

@ -64,11 +64,10 @@ add_lsb_base_depends()
sed -e 's#lsof #lsb-base (>= 3.0-10),\n lsof #' -i debian/control sed -e 's#lsof #lsb-base (>= 3.0-10),\n lsof #' -i debian/control
} }
replace_uring_with_aio() remove_uring()
{ {
sed 's/liburing-dev/libaio-dev/g' -i debian/control sed -e '/liburing-dev/d' -i debian/control
sed -e '/-DIGNORE_AIO_CHECK=ON/d' \ sed -e '/-DWITH_URING=ON/d' -i debian/rules
-e '/-DWITH_URING=ON/d' -i debian/rules
} }
disable_libfmt() disable_libfmt()
@ -116,7 +115,7 @@ in
# Debian # Debian
"buster") "buster")
disable_libfmt disable_libfmt
replace_uring_with_aio remove_uring
;& ;&
"bullseye") "bullseye")
add_lsb_base_depends add_lsb_base_depends
@ -127,7 +126,7 @@ in
# so no removal is necessary. # so no removal is necessary.
if [[ ! "$architecture" =~ amd64|arm64|armel|armhf|i386|mips64el|mipsel|ppc64el|s390x ]] if [[ ! "$architecture" =~ amd64|arm64|armel|armhf|i386|mips64el|mipsel|ppc64el|s390x ]]
then then
replace_uring_with_aio remove_uring
fi fi
;& ;&
"trixie"|"sid") "trixie"|"sid")
@ -136,8 +135,8 @@ in
;; ;;
# Ubuntu # Ubuntu
"focal") "focal")
replace_uring_with_aio
disable_libfmt disable_libfmt
remove_uring
;& ;&
"jammy"|"kinetic") "jammy"|"kinetic")
add_lsb_base_depends add_lsb_base_depends

5
debian/rules vendored
View File

@ -87,9 +87,6 @@ endif
# quality standards in Debian. Also building it requires an extra 4 GB of disk # quality standards in Debian. Also building it requires an extra 4 GB of disk
# space which makes native Debian builds fail as the total disk space needed # space which makes native Debian builds fail as the total disk space needed
# for MariaDB becomes over 10 GB. Only build CS via autobake-deb.sh. # for MariaDB becomes over 10 GB. Only build CS via autobake-deb.sh.
#
# Note: Don't use '-DWITH_URING=ON' as some Buildbot builders are missing it
# and would fail permanently.
PATH=$${MYSQL_BUILD_PATH:-"/usr/lib/ccache:/usr/local/bin:/usr/bin:/bin"} \ PATH=$${MYSQL_BUILD_PATH:-"/usr/lib/ccache:/usr/local/bin:/usr/bin:/bin"} \
dh_auto_configure --builddirectory=$(BUILDDIR) -- \ dh_auto_configure --builddirectory=$(BUILDDIR) -- \
-DCMAKE_BUILD_TYPE=RelWithDebInfo \ -DCMAKE_BUILD_TYPE=RelWithDebInfo \
@ -103,6 +100,8 @@ endif
-DPLUGIN_AWS_KEY_MANAGEMENT=NO \ -DPLUGIN_AWS_KEY_MANAGEMENT=NO \
-DPLUGIN_COLUMNSTORE=NO \ -DPLUGIN_COLUMNSTORE=NO \
-DIGNORE_AIO_CHECK=ON \ -DIGNORE_AIO_CHECK=ON \
-DWITH_URING=ON \
-DWITH_LIBAIO=ON \
-DDEB=$(DEB_VENDOR) -DDEB=$(DEB_VENDOR)
# This is needed, otherwise 'make test' will run before binaries have been built # This is needed, otherwise 'make test' will run before binaries have been built

View File

@ -378,6 +378,10 @@ extern const char *innodb_checksum_algorithm_names[];
extern TYPELIB innodb_checksum_algorithm_typelib; extern TYPELIB innodb_checksum_algorithm_typelib;
extern const char *innodb_flush_method_names[]; extern const char *innodb_flush_method_names[];
extern TYPELIB innodb_flush_method_typelib; extern TYPELIB innodb_flush_method_typelib;
#ifdef __linux__
extern const char *innodb_linux_aio_names[];
extern TYPELIB innodb_linux_aio_typelib;
#endif
static const char *binlog_info_values[] = {"off", "lockless", "on", "auto", static const char *binlog_info_values[] = {"off", "lockless", "on", "auto",
NullS}; NullS};
@ -1334,6 +1338,9 @@ enum options_xtrabackup
OPT_INNODB_READ_IO_THREADS, OPT_INNODB_READ_IO_THREADS,
OPT_INNODB_WRITE_IO_THREADS, OPT_INNODB_WRITE_IO_THREADS,
OPT_INNODB_USE_NATIVE_AIO, OPT_INNODB_USE_NATIVE_AIO,
#ifdef __linux__
OPT_INNODB_LINUX_AIO,
#endif
OPT_INNODB_PAGE_SIZE, OPT_INNODB_PAGE_SIZE,
OPT_INNODB_BUFFER_POOL_FILENAME, OPT_INNODB_BUFFER_POOL_FILENAME,
OPT_INNODB_LOCK_WAIT_TIMEOUT, OPT_INNODB_LOCK_WAIT_TIMEOUT,
@ -1934,6 +1941,14 @@ struct my_option xb_server_options[] =
(G_PTR*) &srv_use_native_aio, (G_PTR*) &srv_use_native_aio,
(G_PTR*) &srv_use_native_aio, 0, GET_BOOL, NO_ARG, (G_PTR*) &srv_use_native_aio, 0, GET_BOOL, NO_ARG,
TRUE, 0, 0, 0, 0, 0}, TRUE, 0, 0, 0, 0, 0},
#ifdef __linux__
{"innodb_linux_aio", OPT_INNODB_LINUX_AIO,
"Which linux AIO implementation to use, auto (io_uring, failing to aio) or explicit",
(G_PTR*) &srv_linux_aio_method,
(G_PTR*) &srv_linux_aio_method,
&innodb_linux_aio_typelib, GET_ENUM, REQUIRED_ARG,
SRV_LINUX_AIO_AUTO, 0, 0, 0, 0, 0},
#endif
{"innodb_page_size", OPT_INNODB_PAGE_SIZE, {"innodb_page_size", OPT_INNODB_PAGE_SIZE,
"The universal page size of the database.", "The universal page size of the database.",
(G_PTR*) &innobase_page_size, (G_PTR*) &innobase_page_size, 0, (G_PTR*) &innobase_page_size, (G_PTR*) &innobase_page_size, 0,
@ -2529,19 +2544,8 @@ static bool innodb_init_param()
ut_ad(DATA_MYSQL_BINARY_CHARSET_COLL == my_charset_bin.number); ut_ad(DATA_MYSQL_BINARY_CHARSET_COLL == my_charset_bin.number);
#ifdef _WIN32 #if defined(_WIN32) || defined(LINUX_NATIVE_AIO) || defined(HAVE_URING)
srv_use_native_aio = TRUE; srv_use_native_aio = TRUE;
#elif defined(LINUX_NATIVE_AIO)
if (srv_use_native_aio) {
msg("InnoDB: Using Linux native AIO");
}
#elif defined(HAVE_URING)
if (srv_use_native_aio) {
msg("InnoDB: Using liburing");
}
#else #else
/* Currently native AIO is supported only on windows and linux /* Currently native AIO is supported only on windows and linux
and that also when the support is compiled in. In all other and that also when the support is compiled in. In all other
@ -5473,7 +5477,6 @@ fail:
xb_fil_io_init(); xb_fil_io_init();
if (os_aio_init()) { if (os_aio_init()) {
msg("Error: cannot initialize AIO subsystem");
goto fail; goto fail;
} }

View File

@ -4548,7 +4548,7 @@ sub extract_warning_lines ($$) {
qr|InnoDB: io_setup\(\) attempt|, qr|InnoDB: io_setup\(\) attempt|,
qr|InnoDB: io_setup\(\) failed with EAGAIN|, qr|InnoDB: io_setup\(\) failed with EAGAIN|,
qr|io_uring_queue_init\(\) failed with|, qr|io_uring_queue_init\(\) failed with|,
qr|InnoDB: liburing disabled|, qr|InnoDB: io_uring failed: falling back to libaio|,
qr/InnoDB: Failed to set O_DIRECT on file/, qr/InnoDB: Failed to set O_DIRECT on file/,
qr|setrlimit could not change the size of core files to 'infinity';|, qr|setrlimit could not change the size of core files to 'infinity';|,
qr|failed to retrieve the MAC address|, qr|failed to retrieve the MAC address|,

View File

@ -0,0 +1,21 @@
select @@global.innodb_linux_aio;
@@global.innodb_linux_aio
auto
select @@session.innodb_linux_aio;
ERROR HY000: Variable 'innodb_linux_aio' is a GLOBAL variable
show global variables like 'innodb_linux_aio';
Variable_name Value
innodb_linux_aio auto
show session variables like 'innodb_linux_aio';
Variable_name Value
innodb_linux_aio auto
select * from information_schema.global_variables where variable_name='innodb_linux_aio';
VARIABLE_NAME VARIABLE_VALUE
INNODB_LINUX_AIO auto
select * from information_schema.session_variables where variable_name='innodb_linux_aio';
VARIABLE_NAME VARIABLE_VALUE
INNODB_LINUX_AIO auto
set global innodb_linux_aio='auto';
ERROR HY000: Variable 'innodb_linux_aio' is a read only variable
set session innodb_linux_aio='aio';
ERROR HY000: Variable 'innodb_linux_aio' is a read only variable

View File

@ -5,6 +5,7 @@ variable_name not in (
'innodb_evict_tables_on_commit_debug', # one may want to override this 'innodb_evict_tables_on_commit_debug', # one may want to override this
'innodb_use_native_aio', # default value depends on OS 'innodb_use_native_aio', # default value depends on OS
'innodb_log_file_buffering', # only available on Linux and Windows 'innodb_log_file_buffering', # only available on Linux and Windows
'innodb_linux_aio', # existence depends on OS
'innodb_buffer_pool_load_pages_abort') # debug build only, and is only for testing 'innodb_buffer_pool_load_pages_abort') # debug build only, and is only for testing
order by variable_name; order by variable_name;
VARIABLE_NAME INNODB_ADAPTIVE_FLUSHING VARIABLE_NAME INNODB_ADAPTIVE_FLUSHING

View File

@ -0,0 +1,23 @@
--source include/have_innodb.inc
--source include/linux.inc
# enum readonly
#
# show values;
#
select @@global.innodb_linux_aio;
--error ER_INCORRECT_GLOBAL_LOCAL_VAR
select @@session.innodb_linux_aio;
show global variables like 'innodb_linux_aio';
show session variables like 'innodb_linux_aio';
select * from information_schema.global_variables where variable_name='innodb_linux_aio';
select * from information_schema.session_variables where variable_name='innodb_linux_aio';
#
# show that it's read-only
#
--error ER_INCORRECT_GLOBAL_LOCAL_VAR
set global innodb_linux_aio='auto';
--error ER_INCORRECT_GLOBAL_LOCAL_VAR
set session innodb_linux_aio='aio';

View File

@ -16,5 +16,6 @@ select VARIABLE_NAME, SESSION_VALUE, DEFAULT_VALUE, VARIABLE_SCOPE, VARIABLE_TYP
'innodb_evict_tables_on_commit_debug', # one may want to override this 'innodb_evict_tables_on_commit_debug', # one may want to override this
'innodb_use_native_aio', # default value depends on OS 'innodb_use_native_aio', # default value depends on OS
'innodb_log_file_buffering', # only available on Linux and Windows 'innodb_log_file_buffering', # only available on Linux and Windows
'innodb_linux_aio', # existence depends on OS
'innodb_buffer_pool_load_pages_abort') # debug build only, and is only for testing 'innodb_buffer_pool_load_pages_abort') # debug build only, and is only for testing
order by variable_name; order by variable_name;

View File

@ -315,6 +315,25 @@ static TYPELIB innodb_stats_method_typelib = {
NULL NULL
}; };
/** Possible values for system variable "innodb_linux_aio" */
#ifdef __linux__
const char* innodb_linux_aio_names[] = {
"auto", /* SRV_LINUX_AIO_AUTO */
"io_uring", /* SRV_LINUX_AIO_IO_URING */
"aio", /* SRV_LINUX_AIO_LIBAIO */
NullS
};
/** Used to define an enumerate type of the system variable
innodb_linux_aio. Used by mariadb-backup too. */
TYPELIB innodb_linux_aio_typelib = {
array_elements(innodb_linux_aio_names) - 1,
"innodb_linux_aio_typelib",
innodb_linux_aio_names,
NULL
};
#endif
/** Possible values of the parameter innodb_checksum_algorithm */ /** Possible values of the parameter innodb_checksum_algorithm */
const char* innodb_checksum_algorithm_names[] = { const char* innodb_checksum_algorithm_names[] = {
"crc32", "crc32",
@ -19664,6 +19683,15 @@ static MYSQL_SYSVAR_BOOL(use_native_aio, srv_use_native_aio,
"Use native AIO if supported on this platform.", "Use native AIO if supported on this platform.",
NULL, NULL, TRUE); NULL, NULL, TRUE);
#ifdef __linux__
static MYSQL_SYSVAR_ENUM(linux_aio, srv_linux_aio_method,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"Specifies which Linux AIO implementation should be used."
" Possible value are \"auto\" (default) to select io_uring"
" and fallback to aio, or explicit \"io_uring\" or \"aio\"",
nullptr, nullptr, SRV_LINUX_AIO_AUTO, &innodb_linux_aio_typelib);
#endif
#ifdef HAVE_LIBNUMA #ifdef HAVE_LIBNUMA
static MYSQL_SYSVAR_BOOL(numa_interleave, srv_numa_interleave, static MYSQL_SYSVAR_BOOL(numa_interleave, srv_numa_interleave,
PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
@ -20059,6 +20087,9 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(tmpdir), MYSQL_SYSVAR(tmpdir),
MYSQL_SYSVAR(autoinc_lock_mode), MYSQL_SYSVAR(autoinc_lock_mode),
MYSQL_SYSVAR(use_native_aio), MYSQL_SYSVAR(use_native_aio),
#ifdef __linux__
MYSQL_SYSVAR(linux_aio),
#endif
#ifdef HAVE_LIBNUMA #ifdef HAVE_LIBNUMA
MYSQL_SYSVAR(numa_interleave), MYSQL_SYSVAR(numa_interleave),
#endif /* HAVE_LIBNUMA */ #endif /* HAVE_LIBNUMA */

View File

@ -77,6 +77,19 @@ enum srv_flush_t
#endif #endif
}; };
/** Possible values of innodb_linux_aio */
#ifdef __linux__
enum srv_linux_aio_t
{
/** auto, io_uring first and then aio */
SRV_LINUX_AIO_AUTO,
/** io_uring */
SRV_LINUX_AIO_IO_URING,
/** aio (libaio interface) */
SRV_LINUX_AIO_LIBAIO
};
#endif
/** innodb_flush_method */ /** innodb_flush_method */
extern ulong srv_file_flush_method; extern ulong srv_file_flush_method;

View File

@ -178,6 +178,12 @@ OS (provided we compiled Innobase with it in), otherwise we will
use simulated aio. use simulated aio.
Currently we support native aio on windows and linux */ Currently we support native aio on windows and linux */
extern my_bool srv_use_native_aio; extern my_bool srv_use_native_aio;
#ifdef __linux__
/* This enum is defined which linux native io method to use */
extern ulong srv_linux_aio_method;
#endif
extern my_bool srv_numa_interleave; extern my_bool srv_numa_interleave;
/* Use atomic writes i.e disable doublewrite buffer */ /* Use atomic writes i.e disable doublewrite buffer */

View File

@ -52,10 +52,6 @@ Created 10/21/1995 Heikki Tuuri
#include <tpool_structs.h> #include <tpool_structs.h>
#ifdef LINUX_NATIVE_AIO
#include <libaio.h>
#endif /* LINUX_NATIVE_AIO */
#ifdef HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE #ifdef HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE
# include <fcntl.h> # include <fcntl.h>
# include <linux/falloc.h> # include <linux/falloc.h>
@ -3084,132 +3080,6 @@ static void write_io_callback(void *c)
write_slots->release(cb); write_slots->release(cb);
} }
#ifdef LINUX_NATIVE_AIO
/** Checks if the system supports native linux aio. On some kernel
versions where native aio is supported it won't work on tmpfs. In such
cases we can't use native aio.
@return: true if supported, false otherwise. */
static bool is_linux_native_aio_supported()
{
File fd;
io_context_t io_ctx;
std::string log_file_path = get_log_file_path();
memset(&io_ctx, 0, sizeof(io_ctx));
if (io_setup(1, &io_ctx)) {
/* The platform does not support native aio. */
return(false);
}
else if (!srv_read_only_mode) {
/* Now check if tmpdir supports native aio ops. */
fd = mysql_tmpfile("ib");
if (fd < 0) {
ib::warn()
<< "Unable to create temp file to check"
" native AIO support.";
int ret = io_destroy(io_ctx);
ut_a(ret != -EINVAL);
ut_ad(ret != -EFAULT);
return(false);
}
}
else {
fd = my_open(log_file_path.c_str(), O_RDONLY | O_CLOEXEC,
MYF(0));
if (fd == -1) {
ib::warn() << "Unable to open \"" << log_file_path
<< "\" to check native"
<< " AIO read support.";
int ret = io_destroy(io_ctx);
ut_a(ret != EINVAL);
ut_ad(ret != EFAULT);
return(false);
}
}
struct io_event io_event;
memset(&io_event, 0x0, sizeof(io_event));
byte* ptr = static_cast<byte*>(aligned_malloc(srv_page_size,
srv_page_size));
struct iocb iocb;
/* Suppress valgrind warning. */
memset(ptr, 0, srv_page_size);
memset(&iocb, 0x0, sizeof(iocb));
struct iocb* p_iocb = &iocb;
if (!srv_read_only_mode) {
io_prep_pwrite(p_iocb, fd, ptr, srv_page_size, 0);
}
else {
ut_a(srv_page_size >= 512);
io_prep_pread(p_iocb, fd, ptr, 512, 0);
}
int err = io_submit(io_ctx, 1, &p_iocb);
if (err >= 1) {
/* Now collect the submitted IO request. */
err = io_getevents(io_ctx, 1, 1, &io_event, NULL);
}
aligned_free(ptr);
my_close(fd, MYF(MY_WME));
switch (err) {
case 1:
{
int ret = io_destroy(io_ctx);
ut_a(ret != -EINVAL);
ut_ad(ret != -EFAULT);
return(true);
}
case -EINVAL:
case -ENOSYS:
ib::warn()
<< "Linux Native AIO not supported. You can either"
" move "
<< (srv_read_only_mode ? log_file_path : "tmpdir")
<< " to a file system that supports native"
" AIO or you can set innodb_use_native_aio to"
" FALSE to avoid this message.";
/* fall through. */
default:
ib::warn()
<< "Linux Native AIO check on "
<< (srv_read_only_mode ? log_file_path : "tmpdir")
<< "returned error[" << -err << "]";
}
int ret = io_destroy(io_ctx);
ut_a(ret != -EINVAL);
ut_ad(ret != -EFAULT);
return(false);
}
#endif
int os_aio_init() noexcept int os_aio_init() noexcept
{ {
int max_write_events= int(srv_n_write_io_threads * int max_write_events= int(srv_n_write_io_threads *
@ -3217,41 +3087,56 @@ int os_aio_init() noexcept
int max_read_events= int(srv_n_read_io_threads * int max_read_events= int(srv_n_read_io_threads *
OS_AIO_N_PENDING_IOS_PER_THREAD); OS_AIO_N_PENDING_IOS_PER_THREAD);
int max_events= max_read_events + max_write_events; int max_events= max_read_events + max_write_events;
int ret; int ret= 1;
#if LINUX_NATIVE_AIO
if (srv_use_native_aio && !is_linux_native_aio_supported())
goto disable;
#endif
ret= srv_thread_pool->configure_aio(srv_use_native_aio, max_events); #if defined __linux__ && (defined HAVE_URING || defined LINUX_NATIVE_AIO)
if (srv_use_native_aio)
#ifdef LINUX_NATIVE_AIO
if (ret)
{ {
ut_ad(srv_use_native_aio); switch (srv_linux_aio_method) {
disable: case SRV_LINUX_AIO_AUTO:
ib::warn() << "Linux Native AIO disabled."; case SRV_LINUX_AIO_IO_URING:
srv_use_native_aio= false;
ret= srv_thread_pool->configure_aio(false, max_events);
}
#endif
# ifdef HAVE_URING # ifdef HAVE_URING
ret= srv_thread_pool->configure_aio(srv_use_native_aio, max_events,
tpool::OS_IO_URING);
# endif
# ifdef LINUX_NATIVE_AIO
# ifdef HAVE_URING
if (ret && srv_linux_aio_method == SRV_LINUX_AIO_AUTO)
sql_print_warning("InnoDB: io_uring failed: falling back to libaio");
else
break;
/* fallthough */
# endif /* HAVE_URING */
case SRV_LINUX_AIO_LIBAIO:
ret= srv_thread_pool->configure_aio(srv_use_native_aio, max_events,
tpool::OS_AIO);
# endif
}
if (ret) if (ret)
{ {
ut_ad(srv_use_native_aio);
ib::warn()
<< "liburing disabled: falling back to innodb_use_native_aio=OFF";
srv_use_native_aio= false; srv_use_native_aio= false;
ret= srv_thread_pool->configure_aio(false, max_events); sql_print_warning("InnoDB: native AIO failed: falling back to"
" innodb_use_native_aio=OFF");
} }
#endif else
sql_print_information("InnoDB: Using %s", srv_thread_pool
->get_aio_implementation());
}
#endif /* linux */
if (ret)
ret= srv_thread_pool->configure_aio(srv_use_native_aio,
max_events,
tpool::OS_DEFAULT);
if (!ret) if (!ret)
{ {
read_slots= new io_slots(max_read_events, srv_n_read_io_threads); read_slots= new io_slots(max_read_events, srv_n_read_io_threads);
write_slots= new io_slots(max_write_events, srv_n_write_io_threads); write_slots= new io_slots(max_write_events, srv_n_write_io_threads);
} }
else
sql_print_error("InnoDB: Cannot initialize AIO sub-system");
return ret; return ret;
} }
@ -3290,8 +3175,8 @@ int os_aio_resize(ulint n_reader_threads, ulint n_writer_threads) noexcept
int max_write_events= int(n_writer_threads * OS_AIO_N_PENDING_IOS_PER_THREAD); int max_write_events= int(n_writer_threads * OS_AIO_N_PENDING_IOS_PER_THREAD);
int events= max_read_events + max_write_events; int events= max_read_events + max_write_events;
/** Do the Linux AIO dance (this will try to create a new /* Do the Linux AIO dance (this will try to create a new
io context with changed max_events ,etc*/ io context with changed max_events, etc.) */
int ret= srv_thread_pool->reconfigure_aio(srv_use_native_aio, events); int ret= srv_thread_pool->reconfigure_aio(srv_use_native_aio, events);

View File

@ -137,6 +137,10 @@ OS (provided we compiled Innobase with it in), otherwise we will
use simulated aio we build below with threads. use simulated aio we build below with threads.
Currently we support native aio on windows and linux */ Currently we support native aio on windows and linux */
my_bool srv_use_native_aio; my_bool srv_use_native_aio;
#ifdef __linux__
/* This enum is defined which linux native io method to use */
ulong srv_linux_aio_method;
#endif
my_bool srv_numa_interleave; my_bool srv_numa_interleave;
/** copy of innodb_use_atomic_writes; @see innodb_init_params() */ /** copy of innodb_use_atomic_writes; @see innodb_init_params() */
my_bool srv_use_atomic_writes; my_bool srv_use_atomic_writes;

View File

@ -1287,22 +1287,9 @@ dberr_t srv_start(bool create_new_db)
} }
if (os_aio_init()) { if (os_aio_init()) {
ib::error() << "Cannot initialize AIO sub-system";
return(srv_init_abort(DB_ERROR)); return(srv_init_abort(DB_ERROR));
} }
#ifdef LINUX_NATIVE_AIO
if (srv_use_native_aio) {
ib::info() << "Using Linux native AIO";
}
#endif
#ifdef HAVE_URING
if (srv_use_native_aio) {
ib::info() << "Using liburing";
}
#endif
fil_system.create(srv_file_per_table ? 50000 : 5000); fil_system.create(srv_file_per_table ? 50000 : 5000);
if (buf_pool.create()) { if (buf_pool.create()) {

View File

@ -3,16 +3,18 @@ IF(WIN32)
SET(EXTRA_SOURCES tpool_win.cc aio_win.cc) SET(EXTRA_SOURCES tpool_win.cc aio_win.cc)
ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Linux") ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Linux")
OPTION(WITH_URING "Require that io_uring be used" OFF) OPTION(WITH_URING "Require that io_uring be used" OFF)
OPTION(WITH_LIBAIO "Require that libaio is used, unless uring is there" OFF) OPTION(WITH_LIBAIO "Require that libaio is used" OFF)
IF(WITH_URING) IF(WITH_URING)
SET(URING_REQUIRED REQUIRED) SET(URING_REQUIRED REQUIRED)
ELSEIF(WITH_LIBAIO) ENDIF()
IF(WITH_LIBAIO)
SET(LIBAIO_REQUIRED REQUIRED) SET(LIBAIO_REQUIRED REQUIRED)
ENDIF() ENDIF()
SET(EXTRA_SOURCES)
FIND_PACKAGE(URING QUIET ${URING_REQUIRED}) FIND_PACKAGE(URING QUIET ${URING_REQUIRED})
IF(URING_FOUND) IF(URING_FOUND)
SET(URING_FOUND ${URING_FOUND} PARENT_SCOPE) SET(URING_FOUND ${URING_FOUND} PARENT_SCOPE)
SET(TPOOL_DEFINES "-DHAVE_URING" PARENT_SCOPE) SET(TPOOL_DEFINES "-DHAVE_URING")
ADD_DEFINITIONS(-DHAVE_URING) ADD_DEFINITIONS(-DHAVE_URING)
LINK_LIBRARIES(${URING_LIBRARIES}) LINK_LIBRARIES(${URING_LIBRARIES})
INCLUDE_DIRECTORIES(${URING_INCLUDE_DIRS}) INCLUDE_DIRECTORIES(${URING_INCLUDE_DIRS})
@ -27,16 +29,16 @@ ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Linux")
IF(HAVE_IO_URING_MLOCK_SIZE) IF(HAVE_IO_URING_MLOCK_SIZE)
SET_SOURCE_FILES_PROPERTIES(aio_liburing.cc PROPERTIES COMPILE_FLAGS "-DHAVE_IO_URING_MLOCK_SIZE") SET_SOURCE_FILES_PROPERTIES(aio_liburing.cc PROPERTIES COMPILE_FLAGS "-DHAVE_IO_URING_MLOCK_SIZE")
ENDIF() ENDIF()
ELSE() ENDIF()
FIND_PACKAGE(LIBAIO QUIET ${LIBAIO_REQUIRED}) FIND_PACKAGE(LIBAIO QUIET ${LIBAIO_REQUIRED})
IF(LIBAIO_FOUND) IF(LIBAIO_FOUND)
SET(TPOOL_DEFINES "-DLINUX_NATIVE_AIO" PARENT_SCOPE) SET(TPOOL_DEFINES ${TPOOL_DEFINES} "-DLINUX_NATIVE_AIO")
ADD_DEFINITIONS(-DLINUX_NATIVE_AIO) ADD_DEFINITIONS(-DLINUX_NATIVE_AIO)
INCLUDE_DIRECTORIES(${LIBAIO_INCLUDE_DIRS}) INCLUDE_DIRECTORIES(${LIBAIO_INCLUDE_DIRS})
LINK_LIBRARIES(${LIBAIO_LIBRARIES}) LINK_LIBRARIES(${LIBAIO_LIBRARIES})
SET(EXTRA_SOURCES aio_linux.cc) SET(EXTRA_SOURCES ${EXTRA_SOURCES} aio_linux.cc)
ENDIF()
ENDIF() ENDIF()
SET(TPOOL_DEFINES ${TPOOL_DEFINES} PARENT_SCOPE)
ENDIF() ENDIF()
ADD_LIBRARY(tpool STATIC ADD_LIBRARY(tpool STATIC

View File

@ -79,8 +79,9 @@ public:
thread_= std::thread(thread_routine, this); thread_= std::thread(thread_routine, this);
} }
const char *get_implementation() const override { return "io_uring"; };
~aio_uring() noexcept ~aio_uring() noexcept override
{ {
{ {
std::lock_guard<std::mutex> _(mutex_); std::lock_guard<std::mutex> _(mutex_);
@ -103,8 +104,8 @@ public:
int submit_io(tpool::aiocb *cb) final int submit_io(tpool::aiocb *cb) final
{ {
cb->iov_base= cb->m_buffer; cb->m_iovec.iov_base= cb->m_buffer;
cb->iov_len= cb->m_len; cb->m_iovec.iov_len= cb->m_len;
// The whole operation since io_uring_get_sqe() and till io_uring_submit() // The whole operation since io_uring_get_sqe() and till io_uring_submit()
// must be atomical. This is because liburing provides thread-unsafe calls. // must be atomical. This is because liburing provides thread-unsafe calls.
@ -112,11 +113,9 @@ public:
io_uring_sqe *sqe= io_uring_get_sqe(&uring_); io_uring_sqe *sqe= io_uring_get_sqe(&uring_);
if (cb->m_opcode == tpool::aio_opcode::AIO_PREAD) if (cb->m_opcode == tpool::aio_opcode::AIO_PREAD)
io_uring_prep_readv(sqe, cb->m_fh, static_cast<struct iovec *>(cb), 1, io_uring_prep_readv(sqe, cb->m_fh, &cb->m_iovec, 1, cb->m_offset);
cb->m_offset);
else else
io_uring_prep_writev(sqe, cb->m_fh, static_cast<struct iovec *>(cb), 1, io_uring_prep_writev(sqe, cb->m_fh, &cb->m_iovec, 1, cb->m_offset);
cb->m_offset);
io_uring_sqe_set_data(sqe, cb); io_uring_sqe_set_data(sqe, cb);
return io_uring_submit(&uring_) == 1 ? 0 : -1; return io_uring_submit(&uring_) == 1 ? 0 : -1;
@ -203,11 +202,27 @@ private:
namespace tpool namespace tpool
{ {
aio *create_linux_aio(thread_pool *pool, int max_aio) #ifdef LINUX_NATIVE_AIO
aio *create_libaio(thread_pool* tp, int max_io);
#endif
aio *create_linux_aio(thread_pool *pool, int max_aio,
aio_implementation implementation)
{ {
switch (implementation) {
case OS_DEFAULT:
case OS_IO_URING:
try { try {
return new aio_uring(pool, max_aio); return new aio_uring(pool, max_aio);
} catch (std::runtime_error& error) { } catch (std::runtime_error&) {
return nullptr;
}
break;
#ifdef LINUX_NATIVE_AIO
case OS_AIO:
return create_libaio(pool, max_aio);
#endif
default:
return nullptr; return nullptr;
} }
} }

View File

@ -13,13 +13,8 @@ You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111 - 1301 USA*/ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111 - 1301 USA*/
#include "tpool_structs.h"
#include "tpool.h" #include "tpool.h"
#include <thread> #include <thread>
# include <atomic>
# include <cstdio>
# include <libaio.h>
#include <sys/syscall.h> #include <sys/syscall.h>
/** /**
@ -58,6 +53,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111 - 1301 USA*/
will make this version of io_getevents return EINVAL. will make this version of io_getevents return EINVAL.
*/ */
static int my_getevents(io_context_t ctx, long min_nr, long nr, io_event *ev) static int my_getevents(io_context_t ctx, long min_nr, long nr, io_event *ev)
noexcept
{ {
int saved_errno= errno; int saved_errno= errno;
int ret= syscall(__NR_io_getevents, reinterpret_cast<long>(ctx), int ret= syscall(__NR_io_getevents, reinterpret_cast<long>(ctx),
@ -81,8 +77,9 @@ static int my_getevents(io_context_t ctx, long min_nr, long nr, io_event *ev)
with io_getevents() and forward io completion callback to with io_getevents() and forward io completion callback to
the worker threadpool. the worker threadpool.
*/ */
namespace tpool namespace
{ {
using namespace tpool;
class aio_linux final : public aio class aio_linux final : public aio
{ {
@ -120,7 +117,7 @@ class aio_linux final : public aio
for (int i= 0; i < ret; i++) for (int i= 0; i < ret; i++)
{ {
const io_event &event= events[i]; const io_event &event= events[i];
aiocb *iocb= static_cast<aiocb*>(event.obj); aiocb *iocb= reinterpret_cast<aiocb*>(event.obj);
if (static_cast<int>(event.res) < 0) if (static_cast<int>(event.res) < 0)
{ {
iocb->m_err= -event.res; iocb->m_err= -event.res;
@ -160,11 +157,10 @@ public:
int submit_io(aiocb *cb) override int submit_io(aiocb *cb) override
{ {
io_prep_pread(static_cast<iocb*>(cb), cb->m_fh, cb->m_buffer, cb->m_len, io_prep_pread(&cb->m_iocb, cb->m_fh, cb->m_buffer, cb->m_len, cb->m_offset);
cb->m_offset);
if (cb->m_opcode != aio_opcode::AIO_PREAD) if (cb->m_opcode != aio_opcode::AIO_PREAD)
cb->aio_lio_opcode= IO_CMD_PWRITE; cb->m_iocb.aio_lio_opcode= IO_CMD_PWRITE;
iocb *icb= static_cast<iocb*>(cb); iocb *icb= &cb->m_iocb;
int ret= io_submit(m_io_ctx, 1, &icb); int ret= io_submit(m_io_ctx, 1, &icb);
if (ret == 1) if (ret == 1)
return 0; return 0;
@ -174,11 +170,20 @@ public:
int bind(native_file_handle&) override { return 0; } int bind(native_file_handle&) override { return 0; }
int unbind(const native_file_handle&) override { return 0; } int unbind(const native_file_handle&) override { return 0; }
const char *get_implementation() const override { return "Linux native AIO"; };
}; };
std::atomic<bool> aio_linux::shutdown_in_progress; std::atomic<bool> aio_linux::shutdown_in_progress;
}
aio *create_linux_aio(thread_pool *pool, int max_io) namespace tpool
{
#ifdef HAVE_URING
aio *create_libaio(thread_pool *pool, int max_io)
#else
aio *create_linux_aio(thread_pool *pool, int max_io, aio_implementation)
#endif
{ {
io_context_t ctx; io_context_t ctx;
memset(&ctx, 0, sizeof ctx); memset(&ctx, 0, sizeof ctx);
@ -187,6 +192,7 @@ aio *create_linux_aio(thread_pool *pool, int max_io)
fprintf(stderr, "io_setup(%d) returned %d\n", max_io, ret); fprintf(stderr, "io_setup(%d) returned %d\n", max_io, ret);
return nullptr; return nullptr;
} }
return new aio_linux(ctx, pool); return new aio_linux(ctx, pool);
} }
} }

View File

@ -154,6 +154,7 @@ public:
int bind(native_file_handle &fd) override { return 0; } int bind(native_file_handle &fd) override { return 0; }
int unbind(const native_file_handle &fd) override { return 0; } int unbind(const native_file_handle &fd) override { return 0; }
const char *get_implementation() const override { return "simulated"; }
}; };
aio *create_simulated_aio(thread_pool *tp) aio *create_simulated_aio(thread_pool *tp)

View File

@ -131,6 +131,7 @@ public:
: GetLastError(); : GetLastError();
} }
int unbind(const native_file_handle& fd) override { return 0; } int unbind(const native_file_handle& fd) override { return 0; }
const char *get_implementation() const override { return "completion ports"; }
}; };
aio* create_win_aio(thread_pool* pool, int max_io) aio* create_win_aio(thread_pool* pool, int max_io)

View File

@ -128,12 +128,21 @@ constexpr size_t MAX_AIO_USERDATA_LEN= 4 * sizeof(void*);
struct aiocb struct aiocb
#ifdef _WIN32 #ifdef _WIN32
:OVERLAPPED :OVERLAPPED
#elif defined LINUX_NATIVE_AIO
:iocb
#elif defined HAVE_URING
:iovec
#endif #endif
{ {
#if defined LINUX_NATIVE_AIO || defined HAVE_URING
union {
# ifdef LINUX_NATIVE_AIO
/** The context between io_submit() and io_getevents();
must be the first data member! */
iocb m_iocb;
# endif
# ifdef HAVE_URING
/** The context between io_uring_submit() and io_uring_wait_cqe() */
iovec m_iovec;
# endif
};
#endif
native_file_handle m_fh; native_file_handle m_fh;
aio_opcode m_opcode; aio_opcode m_opcode;
unsigned long long m_offset; unsigned long long m_offset;
@ -173,6 +182,7 @@ public:
virtual int bind(native_file_handle &fd)= 0; virtual int bind(native_file_handle &fd)= 0;
/** "Unind" file to AIO handler (used on Windows only) */ /** "Unind" file to AIO handler (used on Windows only) */
virtual int unbind(const native_file_handle &fd)= 0; virtual int unbind(const native_file_handle &fd)= 0;
virtual const char *get_implementation() const=0;
virtual ~aio(){}; virtual ~aio(){};
protected: protected:
static void synchronous(aiocb *cb); static void synchronous(aiocb *cb);
@ -202,12 +212,21 @@ class thread_pool;
extern aio *create_simulated_aio(thread_pool *tp); extern aio *create_simulated_aio(thread_pool *tp);
enum aio_implementation
{
OS_DEFAULT
#ifdef __linux__
, OS_IO_URING
, OS_AIO
#endif
};
class thread_pool class thread_pool
{ {
protected: protected:
/* AIO handler */ /* AIO handler */
std::unique_ptr<aio> m_aio; std::unique_ptr<aio> m_aio{};
virtual aio *create_native_aio(int max_io)= 0; virtual aio *create_native_aio(int max_io, aio_implementation)= 0;
public: public:
/** /**
@ -217,10 +236,7 @@ public:
void (*m_worker_init_callback)(void)= [] {}; void (*m_worker_init_callback)(void)= [] {};
void (*m_worker_destroy_callback)(void)= [] {}; void (*m_worker_destroy_callback)(void)= [] {};
thread_pool() thread_pool()= default;
: m_aio()
{
}
virtual void submit_task(task *t)= 0; virtual void submit_task(task *t)= 0;
virtual timer* create_timer(callback_func func, void *data=nullptr) = 0; virtual timer* create_timer(callback_func func, void *data=nullptr) = 0;
void set_thread_callbacks(void (*init)(), void (*destroy)()) void set_thread_callbacks(void (*init)(), void (*destroy)())
@ -230,10 +246,10 @@ public:
m_worker_init_callback= init; m_worker_init_callback= init;
m_worker_destroy_callback= destroy; m_worker_destroy_callback= destroy;
} }
int configure_aio(bool use_native_aio, int max_io) int configure_aio(bool use_native_aio, int max_io, aio_implementation impl)
{ {
if (use_native_aio) if (use_native_aio)
m_aio.reset(create_native_aio(max_io)); m_aio.reset(create_native_aio(max_io, impl));
else else
m_aio.reset(create_simulated_aio(this)); m_aio.reset(create_simulated_aio(this));
return !m_aio ? -1 : 0; return !m_aio ? -1 : 0;
@ -244,7 +260,12 @@ public:
assert(m_aio); assert(m_aio);
if (use_native_aio) if (use_native_aio)
{ {
auto new_aio = create_native_aio(max_io); const aio_implementation impl=
#ifdef LINUX_NATIVE_AIO
!strcmp(get_aio_implementation(), "Linux native AIO") ? OS_AIO :
#endif
OS_DEFAULT;
auto new_aio= create_native_aio(max_io, impl);
if (!new_aio) if (!new_aio)
return -1; return -1;
m_aio.reset(new_aio); m_aio.reset(new_aio);
@ -256,6 +277,10 @@ public:
{ {
m_aio.reset(); m_aio.reset();
} }
const char *get_aio_implementation() const
{
return m_aio->get_implementation();
}
/** /**
Tweaks how fast worker threads are created, or how often they are signaled. Tweaks how fast worker threads are created, or how often they are signaled.

View File

@ -40,13 +40,13 @@ namespace tpool
#ifdef __linux__ #ifdef __linux__
# if defined(HAVE_URING) || defined(LINUX_NATIVE_AIO) # if defined(HAVE_URING) || defined(LINUX_NATIVE_AIO)
extern aio* create_linux_aio(thread_pool* tp, int max_io); aio *create_linux_aio(thread_pool* tp, int max_io, aio_implementation);
# else # else
aio *create_linux_aio(thread_pool *, int) { return nullptr; }; static aio *create_linux_aio(thread_pool *, int, aio_implementation)
{ return nullptr; }
# endif # endif
#endif #elif defined _WIN32
#ifdef _WIN32 aio *create_win_aio(thread_pool* tp, int max_io);
extern aio* create_win_aio(thread_pool* tp, int max_io);
#endif #endif
static const std::chrono::milliseconds LONG_TASK_DURATION = std::chrono::milliseconds(500); static const std::chrono::milliseconds LONG_TASK_DURATION = std::chrono::milliseconds(500);
@ -299,16 +299,15 @@ public:
void wait_begin() override; void wait_begin() override;
void wait_end() override; void wait_end() override;
void submit_task(task *task) override; void submit_task(task *task) override;
aio *create_native_aio(int max_io) override
{
#ifdef _WIN32 #ifdef _WIN32
return create_win_aio(this, max_io); aio *create_native_aio(int max_io, aio_implementation) override
#elif defined(__linux__) { return create_win_aio(this, max_io); }
return create_linux_aio(this,max_io); #elif defined __linux__
aio *create_native_aio(int max_io, aio_implementation impl) override
{ return create_linux_aio(this, max_io, impl); }
#else #else
return nullptr; aio *create_native_aio(int, aio_implementation) override { return nullptr; }
#endif #endif
}
class timer_generic : public thr_timer_t, public timer class timer_generic : public thr_timer_t, public timer
{ {

View File

@ -206,6 +206,11 @@ class thread_pool_win : public thread_pool
CloseThreadpoolIo(fd.m_ptp_io); CloseThreadpoolIo(fd.m_ptp_io);
return 0; return 0;
} }
/**
Expose implementation.
*/
const char *get_implementation() const override { return "ThreadPool"; }
}; };
PTP_POOL m_ptp_pool; PTP_POOL m_ptp_pool;
@ -268,7 +273,7 @@ public:
abort(); abort();
} }
aio *create_native_aio(int max_io) override aio *create_native_aio(int max_io, aio_implementation) override
{ {
return new native_aio(*this, max_io); return new native_aio(*this, max_io);
} }