MDEV-36234: Add innodb_linux_aio

This controls which linux implementation to use for
innodb_use_native_aio=ON.

innodb_linux_aio=auto is equivalent to innodb_linux_aio=io_uring when
it is available, and falling back to innodb_linux_aio=aio when not.

Debian packaging is no longer aio exclusive or uring, so
for those older Debian or Ubuntu releases, its a remove_uring directive.
For more recent releases, add mandatory liburing for consistent packaging.

WITH_LIBAIO is now an independent option from WITH_URING.

is_linux_native_aio_supported(): Remove. This had originally been added in
mysql/mysql-server@0da310b69d in 2012
to fix an issue where io_submit() on CentOS 5.5 would return EINVAL
for a /tmp/#sql*.ibd file associated with CREATE TEMPORARY TABLE.
But, starting with commit 2e814d4702d71a04388386a9f591d14a35980bfe InnoDB
temporary tables will be written to innodb_temp_data_file_path.
The 2012 commit said that the error could occur on "old kernels".
Any GNU/Linux distribution that we currently support should be based
on a newer Linux kernel; for example, Red Hat Enterprise Linux 7
was released in 2014.

This is joint work with Daniel Black and Vladislav Vaintroub.
This commit is contained in:
Marko Mäkelä 2025-05-26 13:53:41 +03:00
parent db188083c3
commit 585531d6c0
22 changed files with 286 additions and 259 deletions

View File

@ -64,11 +64,10 @@ add_lsb_base_depends()
sed -e 's#lsof #lsb-base (>= 3.0-10),\n lsof #' -i debian/control
}
replace_uring_with_aio()
remove_uring()
{
sed 's/liburing-dev/libaio-dev/g' -i debian/control
sed -e '/-DIGNORE_AIO_CHECK=ON/d' \
-e '/-DWITH_URING=ON/d' -i debian/rules
sed -e '/liburing-dev/d' -i debian/control
sed -e '/-DWITH_URING=ON/d' -i debian/rules
}
disable_libfmt()
@ -116,7 +115,7 @@ in
# Debian
"buster")
disable_libfmt
replace_uring_with_aio
remove_uring
;&
"bullseye")
add_lsb_base_depends
@ -127,7 +126,7 @@ in
# so no removal is necessary.
if [[ ! "$architecture" =~ amd64|arm64|armel|armhf|i386|mips64el|mipsel|ppc64el|s390x ]]
then
replace_uring_with_aio
remove_uring
fi
;&
"trixie"|"sid")
@ -136,8 +135,8 @@ in
;;
# Ubuntu
"focal")
replace_uring_with_aio
disable_libfmt
remove_uring
;&
"jammy"|"kinetic")
add_lsb_base_depends

5
debian/rules vendored
View File

@ -87,9 +87,6 @@ endif
# quality standards in Debian. Also building it requires an extra 4 GB of disk
# space which makes native Debian builds fail as the total disk space needed
# for MariaDB becomes over 10 GB. Only build CS via autobake-deb.sh.
#
# Note: Don't use '-DWITH_URING=ON' as some Buildbot builders are missing it
# and would fail permanently.
PATH=$${MYSQL_BUILD_PATH:-"/usr/lib/ccache:/usr/local/bin:/usr/bin:/bin"} \
dh_auto_configure --builddirectory=$(BUILDDIR) -- \
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
@ -103,6 +100,8 @@ endif
-DPLUGIN_AWS_KEY_MANAGEMENT=NO \
-DPLUGIN_COLUMNSTORE=NO \
-DIGNORE_AIO_CHECK=ON \
-DWITH_URING=ON \
-DWITH_LIBAIO=ON \
-DDEB=$(DEB_VENDOR)
# This is needed, otherwise 'make test' will run before binaries have been built

View File

@ -378,6 +378,10 @@ extern const char *innodb_checksum_algorithm_names[];
extern TYPELIB innodb_checksum_algorithm_typelib;
extern const char *innodb_flush_method_names[];
extern TYPELIB innodb_flush_method_typelib;
#ifdef __linux__
extern const char *innodb_linux_aio_names[];
extern TYPELIB innodb_linux_aio_typelib;
#endif
static const char *binlog_info_values[] = {"off", "lockless", "on", "auto",
NullS};
@ -1334,6 +1338,9 @@ enum options_xtrabackup
OPT_INNODB_READ_IO_THREADS,
OPT_INNODB_WRITE_IO_THREADS,
OPT_INNODB_USE_NATIVE_AIO,
#ifdef __linux__
OPT_INNODB_LINUX_AIO,
#endif
OPT_INNODB_PAGE_SIZE,
OPT_INNODB_BUFFER_POOL_FILENAME,
OPT_INNODB_LOCK_WAIT_TIMEOUT,
@ -1934,6 +1941,14 @@ struct my_option xb_server_options[] =
(G_PTR*) &srv_use_native_aio,
(G_PTR*) &srv_use_native_aio, 0, GET_BOOL, NO_ARG,
TRUE, 0, 0, 0, 0, 0},
#ifdef __linux__
{"innodb_linux_aio", OPT_INNODB_LINUX_AIO,
"Which linux AIO implementation to use, auto (io_uring, failing to aio) or explicit",
(G_PTR*) &srv_linux_aio_method,
(G_PTR*) &srv_linux_aio_method,
&innodb_linux_aio_typelib, GET_ENUM, REQUIRED_ARG,
SRV_LINUX_AIO_AUTO, 0, 0, 0, 0, 0},
#endif
{"innodb_page_size", OPT_INNODB_PAGE_SIZE,
"The universal page size of the database.",
(G_PTR*) &innobase_page_size, (G_PTR*) &innobase_page_size, 0,
@ -2529,19 +2544,8 @@ static bool innodb_init_param()
ut_ad(DATA_MYSQL_BINARY_CHARSET_COLL == my_charset_bin.number);
#ifdef _WIN32
#if defined(_WIN32) || defined(LINUX_NATIVE_AIO) || defined(HAVE_URING)
srv_use_native_aio = TRUE;
#elif defined(LINUX_NATIVE_AIO)
if (srv_use_native_aio) {
msg("InnoDB: Using Linux native AIO");
}
#elif defined(HAVE_URING)
if (srv_use_native_aio) {
msg("InnoDB: Using liburing");
}
#else
/* Currently native AIO is supported only on windows and linux
and that also when the support is compiled in. In all other
@ -5473,7 +5477,6 @@ fail:
xb_fil_io_init();
if (os_aio_init()) {
msg("Error: cannot initialize AIO subsystem");
goto fail;
}

View File

@ -4548,7 +4548,7 @@ sub extract_warning_lines ($$) {
qr|InnoDB: io_setup\(\) attempt|,
qr|InnoDB: io_setup\(\) failed with EAGAIN|,
qr|io_uring_queue_init\(\) failed with|,
qr|InnoDB: liburing disabled|,
qr|InnoDB: io_uring failed: falling back to libaio|,
qr/InnoDB: Failed to set O_DIRECT on file/,
qr|setrlimit could not change the size of core files to 'infinity';|,
qr|failed to retrieve the MAC address|,

View File

@ -0,0 +1,21 @@
select @@global.innodb_linux_aio;
@@global.innodb_linux_aio
auto
select @@session.innodb_linux_aio;
ERROR HY000: Variable 'innodb_linux_aio' is a GLOBAL variable
show global variables like 'innodb_linux_aio';
Variable_name Value
innodb_linux_aio auto
show session variables like 'innodb_linux_aio';
Variable_name Value
innodb_linux_aio auto
select * from information_schema.global_variables where variable_name='innodb_linux_aio';
VARIABLE_NAME VARIABLE_VALUE
INNODB_LINUX_AIO auto
select * from information_schema.session_variables where variable_name='innodb_linux_aio';
VARIABLE_NAME VARIABLE_VALUE
INNODB_LINUX_AIO auto
set global innodb_linux_aio='auto';
ERROR HY000: Variable 'innodb_linux_aio' is a read only variable
set session innodb_linux_aio='aio';
ERROR HY000: Variable 'innodb_linux_aio' is a read only variable

View File

@ -5,6 +5,7 @@ variable_name not in (
'innodb_evict_tables_on_commit_debug', # one may want to override this
'innodb_use_native_aio', # default value depends on OS
'innodb_log_file_buffering', # only available on Linux and Windows
'innodb_linux_aio', # existence depends on OS
'innodb_buffer_pool_load_pages_abort') # debug build only, and is only for testing
order by variable_name;
VARIABLE_NAME INNODB_ADAPTIVE_FLUSHING

View File

@ -0,0 +1,23 @@
--source include/have_innodb.inc
--source include/linux.inc
# enum readonly
#
# show values;
#
select @@global.innodb_linux_aio;
--error ER_INCORRECT_GLOBAL_LOCAL_VAR
select @@session.innodb_linux_aio;
show global variables like 'innodb_linux_aio';
show session variables like 'innodb_linux_aio';
select * from information_schema.global_variables where variable_name='innodb_linux_aio';
select * from information_schema.session_variables where variable_name='innodb_linux_aio';
#
# show that it's read-only
#
--error ER_INCORRECT_GLOBAL_LOCAL_VAR
set global innodb_linux_aio='auto';
--error ER_INCORRECT_GLOBAL_LOCAL_VAR
set session innodb_linux_aio='aio';

View File

@ -16,5 +16,6 @@ select VARIABLE_NAME, SESSION_VALUE, DEFAULT_VALUE, VARIABLE_SCOPE, VARIABLE_TYP
'innodb_evict_tables_on_commit_debug', # one may want to override this
'innodb_use_native_aio', # default value depends on OS
'innodb_log_file_buffering', # only available on Linux and Windows
'innodb_linux_aio', # existence depends on OS
'innodb_buffer_pool_load_pages_abort') # debug build only, and is only for testing
order by variable_name;

View File

@ -315,6 +315,25 @@ static TYPELIB innodb_stats_method_typelib = {
NULL
};
/** Possible values for system variable "innodb_linux_aio" */
#ifdef __linux__
const char* innodb_linux_aio_names[] = {
"auto", /* SRV_LINUX_AIO_AUTO */
"io_uring", /* SRV_LINUX_AIO_IO_URING */
"aio", /* SRV_LINUX_AIO_LIBAIO */
NullS
};
/** Used to define an enumerate type of the system variable
innodb_linux_aio. Used by mariadb-backup too. */
TYPELIB innodb_linux_aio_typelib = {
array_elements(innodb_linux_aio_names) - 1,
"innodb_linux_aio_typelib",
innodb_linux_aio_names,
NULL
};
#endif
/** Possible values of the parameter innodb_checksum_algorithm */
const char* innodb_checksum_algorithm_names[] = {
"crc32",
@ -19664,6 +19683,15 @@ static MYSQL_SYSVAR_BOOL(use_native_aio, srv_use_native_aio,
"Use native AIO if supported on this platform.",
NULL, NULL, TRUE);
#ifdef __linux__
static MYSQL_SYSVAR_ENUM(linux_aio, srv_linux_aio_method,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"Specifies which Linux AIO implementation should be used."
" Possible value are \"auto\" (default) to select io_uring"
" and fallback to aio, or explicit \"io_uring\" or \"aio\"",
nullptr, nullptr, SRV_LINUX_AIO_AUTO, &innodb_linux_aio_typelib);
#endif
#ifdef HAVE_LIBNUMA
static MYSQL_SYSVAR_BOOL(numa_interleave, srv_numa_interleave,
PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
@ -20059,6 +20087,9 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(tmpdir),
MYSQL_SYSVAR(autoinc_lock_mode),
MYSQL_SYSVAR(use_native_aio),
#ifdef __linux__
MYSQL_SYSVAR(linux_aio),
#endif
#ifdef HAVE_LIBNUMA
MYSQL_SYSVAR(numa_interleave),
#endif /* HAVE_LIBNUMA */

View File

@ -77,6 +77,19 @@ enum srv_flush_t
#endif
};
/** Possible values of innodb_linux_aio */
#ifdef __linux__
enum srv_linux_aio_t
{
/** auto, io_uring first and then aio */
SRV_LINUX_AIO_AUTO,
/** io_uring */
SRV_LINUX_AIO_IO_URING,
/** aio (libaio interface) */
SRV_LINUX_AIO_LIBAIO
};
#endif
/** innodb_flush_method */
extern ulong srv_file_flush_method;

View File

@ -178,6 +178,12 @@ OS (provided we compiled Innobase with it in), otherwise we will
use simulated aio.
Currently we support native aio on windows and linux */
extern my_bool srv_use_native_aio;
#ifdef __linux__
/* This enum is defined which linux native io method to use */
extern ulong srv_linux_aio_method;
#endif
extern my_bool srv_numa_interleave;
/* Use atomic writes i.e disable doublewrite buffer */

View File

@ -52,10 +52,6 @@ Created 10/21/1995 Heikki Tuuri
#include <tpool_structs.h>
#ifdef LINUX_NATIVE_AIO
#include <libaio.h>
#endif /* LINUX_NATIVE_AIO */
#ifdef HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE
# include <fcntl.h>
# include <linux/falloc.h>
@ -3084,132 +3080,6 @@ static void write_io_callback(void *c)
write_slots->release(cb);
}
#ifdef LINUX_NATIVE_AIO
/** Checks if the system supports native linux aio. On some kernel
versions where native aio is supported it won't work on tmpfs. In such
cases we can't use native aio.
@return: true if supported, false otherwise. */
static bool is_linux_native_aio_supported()
{
File fd;
io_context_t io_ctx;
std::string log_file_path = get_log_file_path();
memset(&io_ctx, 0, sizeof(io_ctx));
if (io_setup(1, &io_ctx)) {
/* The platform does not support native aio. */
return(false);
}
else if (!srv_read_only_mode) {
/* Now check if tmpdir supports native aio ops. */
fd = mysql_tmpfile("ib");
if (fd < 0) {
ib::warn()
<< "Unable to create temp file to check"
" native AIO support.";
int ret = io_destroy(io_ctx);
ut_a(ret != -EINVAL);
ut_ad(ret != -EFAULT);
return(false);
}
}
else {
fd = my_open(log_file_path.c_str(), O_RDONLY | O_CLOEXEC,
MYF(0));
if (fd == -1) {
ib::warn() << "Unable to open \"" << log_file_path
<< "\" to check native"
<< " AIO read support.";
int ret = io_destroy(io_ctx);
ut_a(ret != EINVAL);
ut_ad(ret != EFAULT);
return(false);
}
}
struct io_event io_event;
memset(&io_event, 0x0, sizeof(io_event));
byte* ptr = static_cast<byte*>(aligned_malloc(srv_page_size,
srv_page_size));
struct iocb iocb;
/* Suppress valgrind warning. */
memset(ptr, 0, srv_page_size);
memset(&iocb, 0x0, sizeof(iocb));
struct iocb* p_iocb = &iocb;
if (!srv_read_only_mode) {
io_prep_pwrite(p_iocb, fd, ptr, srv_page_size, 0);
}
else {
ut_a(srv_page_size >= 512);
io_prep_pread(p_iocb, fd, ptr, 512, 0);
}
int err = io_submit(io_ctx, 1, &p_iocb);
if (err >= 1) {
/* Now collect the submitted IO request. */
err = io_getevents(io_ctx, 1, 1, &io_event, NULL);
}
aligned_free(ptr);
my_close(fd, MYF(MY_WME));
switch (err) {
case 1:
{
int ret = io_destroy(io_ctx);
ut_a(ret != -EINVAL);
ut_ad(ret != -EFAULT);
return(true);
}
case -EINVAL:
case -ENOSYS:
ib::warn()
<< "Linux Native AIO not supported. You can either"
" move "
<< (srv_read_only_mode ? log_file_path : "tmpdir")
<< " to a file system that supports native"
" AIO or you can set innodb_use_native_aio to"
" FALSE to avoid this message.";
/* fall through. */
default:
ib::warn()
<< "Linux Native AIO check on "
<< (srv_read_only_mode ? log_file_path : "tmpdir")
<< "returned error[" << -err << "]";
}
int ret = io_destroy(io_ctx);
ut_a(ret != -EINVAL);
ut_ad(ret != -EFAULT);
return(false);
}
#endif
int os_aio_init() noexcept
{
int max_write_events= int(srv_n_write_io_threads *
@ -3217,41 +3087,56 @@ int os_aio_init() noexcept
int max_read_events= int(srv_n_read_io_threads *
OS_AIO_N_PENDING_IOS_PER_THREAD);
int max_events= max_read_events + max_write_events;
int ret;
#if LINUX_NATIVE_AIO
if (srv_use_native_aio && !is_linux_native_aio_supported())
goto disable;
#endif
int ret= 1;
ret= srv_thread_pool->configure_aio(srv_use_native_aio, max_events);
#ifdef LINUX_NATIVE_AIO
if (ret)
#if defined __linux__ && (defined HAVE_URING || defined LINUX_NATIVE_AIO)
if (srv_use_native_aio)
{
ut_ad(srv_use_native_aio);
disable:
ib::warn() << "Linux Native AIO disabled.";
srv_use_native_aio= false;
ret= srv_thread_pool->configure_aio(false, max_events);
switch (srv_linux_aio_method) {
case SRV_LINUX_AIO_AUTO:
case SRV_LINUX_AIO_IO_URING:
# ifdef HAVE_URING
ret= srv_thread_pool->configure_aio(srv_use_native_aio, max_events,
tpool::OS_IO_URING);
# endif
# ifdef LINUX_NATIVE_AIO
# ifdef HAVE_URING
if (ret && srv_linux_aio_method == SRV_LINUX_AIO_AUTO)
sql_print_warning("InnoDB: io_uring failed: falling back to libaio");
else
break;
/* fallthough */
# endif /* HAVE_URING */
case SRV_LINUX_AIO_LIBAIO:
ret= srv_thread_pool->configure_aio(srv_use_native_aio, max_events,
tpool::OS_AIO);
# endif
}
if (ret)
{
srv_use_native_aio= false;
sql_print_warning("InnoDB: native AIO failed: falling back to"
" innodb_use_native_aio=OFF");
}
else
sql_print_information("InnoDB: Using %s", srv_thread_pool
->get_aio_implementation());
}
#endif
#endif /* linux */
#ifdef HAVE_URING
if (ret)
{
ut_ad(srv_use_native_aio);
ib::warn()
<< "liburing disabled: falling back to innodb_use_native_aio=OFF";
srv_use_native_aio= false;
ret= srv_thread_pool->configure_aio(false, max_events);
}
#endif
ret= srv_thread_pool->configure_aio(srv_use_native_aio,
max_events,
tpool::OS_DEFAULT);
if (!ret)
{
read_slots= new io_slots(max_read_events, srv_n_read_io_threads);
write_slots= new io_slots(max_write_events, srv_n_write_io_threads);
}
else
sql_print_error("InnoDB: Cannot initialize AIO sub-system");
return ret;
}
@ -3290,8 +3175,8 @@ int os_aio_resize(ulint n_reader_threads, ulint n_writer_threads) noexcept
int max_write_events= int(n_writer_threads * OS_AIO_N_PENDING_IOS_PER_THREAD);
int events= max_read_events + max_write_events;
/** Do the Linux AIO dance (this will try to create a new
io context with changed max_events ,etc*/
/* Do the Linux AIO dance (this will try to create a new
io context with changed max_events, etc.) */
int ret= srv_thread_pool->reconfigure_aio(srv_use_native_aio, events);

View File

@ -137,6 +137,10 @@ OS (provided we compiled Innobase with it in), otherwise we will
use simulated aio we build below with threads.
Currently we support native aio on windows and linux */
my_bool srv_use_native_aio;
#ifdef __linux__
/* This enum is defined which linux native io method to use */
ulong srv_linux_aio_method;
#endif
my_bool srv_numa_interleave;
/** copy of innodb_use_atomic_writes; @see innodb_init_params() */
my_bool srv_use_atomic_writes;

View File

@ -1287,22 +1287,9 @@ dberr_t srv_start(bool create_new_db)
}
if (os_aio_init()) {
ib::error() << "Cannot initialize AIO sub-system";
return(srv_init_abort(DB_ERROR));
}
#ifdef LINUX_NATIVE_AIO
if (srv_use_native_aio) {
ib::info() << "Using Linux native AIO";
}
#endif
#ifdef HAVE_URING
if (srv_use_native_aio) {
ib::info() << "Using liburing";
}
#endif
fil_system.create(srv_file_per_table ? 50000 : 5000);
if (buf_pool.create()) {

View File

@ -3,16 +3,18 @@ IF(WIN32)
SET(EXTRA_SOURCES tpool_win.cc aio_win.cc)
ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Linux")
OPTION(WITH_URING "Require that io_uring be used" OFF)
OPTION(WITH_LIBAIO "Require that libaio is used, unless uring is there" OFF)
OPTION(WITH_LIBAIO "Require that libaio is used" OFF)
IF(WITH_URING)
SET(URING_REQUIRED REQUIRED)
ELSEIF(WITH_LIBAIO)
ENDIF()
IF(WITH_LIBAIO)
SET(LIBAIO_REQUIRED REQUIRED)
ENDIF()
SET(EXTRA_SOURCES)
FIND_PACKAGE(URING QUIET ${URING_REQUIRED})
IF(URING_FOUND)
SET(URING_FOUND ${URING_FOUND} PARENT_SCOPE)
SET(TPOOL_DEFINES "-DHAVE_URING" PARENT_SCOPE)
SET(TPOOL_DEFINES "-DHAVE_URING")
ADD_DEFINITIONS(-DHAVE_URING)
LINK_LIBRARIES(${URING_LIBRARIES})
INCLUDE_DIRECTORIES(${URING_INCLUDE_DIRS})
@ -27,16 +29,16 @@ ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Linux")
IF(HAVE_IO_URING_MLOCK_SIZE)
SET_SOURCE_FILES_PROPERTIES(aio_liburing.cc PROPERTIES COMPILE_FLAGS "-DHAVE_IO_URING_MLOCK_SIZE")
ENDIF()
ELSE()
FIND_PACKAGE(LIBAIO QUIET ${LIBAIO_REQUIRED})
IF(LIBAIO_FOUND)
SET(TPOOL_DEFINES "-DLINUX_NATIVE_AIO" PARENT_SCOPE)
ADD_DEFINITIONS(-DLINUX_NATIVE_AIO)
INCLUDE_DIRECTORIES(${LIBAIO_INCLUDE_DIRS})
LINK_LIBRARIES(${LIBAIO_LIBRARIES})
SET(EXTRA_SOURCES aio_linux.cc)
ENDIF()
ENDIF()
FIND_PACKAGE(LIBAIO QUIET ${LIBAIO_REQUIRED})
IF(LIBAIO_FOUND)
SET(TPOOL_DEFINES ${TPOOL_DEFINES} "-DLINUX_NATIVE_AIO")
ADD_DEFINITIONS(-DLINUX_NATIVE_AIO)
INCLUDE_DIRECTORIES(${LIBAIO_INCLUDE_DIRS})
LINK_LIBRARIES(${LIBAIO_LIBRARIES})
SET(EXTRA_SOURCES ${EXTRA_SOURCES} aio_linux.cc)
ENDIF()
SET(TPOOL_DEFINES ${TPOOL_DEFINES} PARENT_SCOPE)
ENDIF()
ADD_LIBRARY(tpool STATIC

View File

@ -79,8 +79,9 @@ public:
thread_= std::thread(thread_routine, this);
}
const char *get_implementation() const override { return "io_uring"; };
~aio_uring() noexcept
~aio_uring() noexcept override
{
{
std::lock_guard<std::mutex> _(mutex_);
@ -103,8 +104,8 @@ public:
int submit_io(tpool::aiocb *cb) final
{
cb->iov_base= cb->m_buffer;
cb->iov_len= cb->m_len;
cb->m_iovec.iov_base= cb->m_buffer;
cb->m_iovec.iov_len= cb->m_len;
// The whole operation since io_uring_get_sqe() and till io_uring_submit()
// must be atomical. This is because liburing provides thread-unsafe calls.
@ -112,11 +113,9 @@ public:
io_uring_sqe *sqe= io_uring_get_sqe(&uring_);
if (cb->m_opcode == tpool::aio_opcode::AIO_PREAD)
io_uring_prep_readv(sqe, cb->m_fh, static_cast<struct iovec *>(cb), 1,
cb->m_offset);
io_uring_prep_readv(sqe, cb->m_fh, &cb->m_iovec, 1, cb->m_offset);
else
io_uring_prep_writev(sqe, cb->m_fh, static_cast<struct iovec *>(cb), 1,
cb->m_offset);
io_uring_prep_writev(sqe, cb->m_fh, &cb->m_iovec, 1, cb->m_offset);
io_uring_sqe_set_data(sqe, cb);
return io_uring_submit(&uring_) == 1 ? 0 : -1;
@ -203,11 +202,27 @@ private:
namespace tpool
{
aio *create_linux_aio(thread_pool *pool, int max_aio)
#ifdef LINUX_NATIVE_AIO
aio *create_libaio(thread_pool* tp, int max_io);
#endif
aio *create_linux_aio(thread_pool *pool, int max_aio,
aio_implementation implementation)
{
try {
return new aio_uring(pool, max_aio);
} catch (std::runtime_error& error) {
switch (implementation) {
case OS_DEFAULT:
case OS_IO_URING:
try {
return new aio_uring(pool, max_aio);
} catch (std::runtime_error&) {
return nullptr;
}
break;
#ifdef LINUX_NATIVE_AIO
case OS_AIO:
return create_libaio(pool, max_aio);
#endif
default:
return nullptr;
}
}

View File

@ -13,14 +13,9 @@ You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111 - 1301 USA*/
#include "tpool_structs.h"
#include "tpool.h"
# include <thread>
# include <atomic>
# include <cstdio>
# include <libaio.h>
# include <sys/syscall.h>
#include <thread>
#include <sys/syscall.h>
/**
Invoke the io_getevents() system call, without timeout parameter.
@ -58,6 +53,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111 - 1301 USA*/
will make this version of io_getevents return EINVAL.
*/
static int my_getevents(io_context_t ctx, long min_nr, long nr, io_event *ev)
noexcept
{
int saved_errno= errno;
int ret= syscall(__NR_io_getevents, reinterpret_cast<long>(ctx),
@ -81,8 +77,9 @@ static int my_getevents(io_context_t ctx, long min_nr, long nr, io_event *ev)
with io_getevents() and forward io completion callback to
the worker threadpool.
*/
namespace tpool
namespace
{
using namespace tpool;
class aio_linux final : public aio
{
@ -120,7 +117,7 @@ class aio_linux final : public aio
for (int i= 0; i < ret; i++)
{
const io_event &event= events[i];
aiocb *iocb= static_cast<aiocb*>(event.obj);
aiocb *iocb= reinterpret_cast<aiocb*>(event.obj);
if (static_cast<int>(event.res) < 0)
{
iocb->m_err= -event.res;
@ -160,11 +157,10 @@ public:
int submit_io(aiocb *cb) override
{
io_prep_pread(static_cast<iocb*>(cb), cb->m_fh, cb->m_buffer, cb->m_len,
cb->m_offset);
io_prep_pread(&cb->m_iocb, cb->m_fh, cb->m_buffer, cb->m_len, cb->m_offset);
if (cb->m_opcode != aio_opcode::AIO_PREAD)
cb->aio_lio_opcode= IO_CMD_PWRITE;
iocb *icb= static_cast<iocb*>(cb);
cb->m_iocb.aio_lio_opcode= IO_CMD_PWRITE;
iocb *icb= &cb->m_iocb;
int ret= io_submit(m_io_ctx, 1, &icb);
if (ret == 1)
return 0;
@ -174,11 +170,20 @@ public:
int bind(native_file_handle&) override { return 0; }
int unbind(const native_file_handle&) override { return 0; }
const char *get_implementation() const override { return "Linux native AIO"; };
};
std::atomic<bool> aio_linux::shutdown_in_progress;
}
aio *create_linux_aio(thread_pool *pool, int max_io)
namespace tpool
{
#ifdef HAVE_URING
aio *create_libaio(thread_pool *pool, int max_io)
#else
aio *create_linux_aio(thread_pool *pool, int max_io, aio_implementation)
#endif
{
io_context_t ctx;
memset(&ctx, 0, sizeof ctx);
@ -187,6 +192,7 @@ aio *create_linux_aio(thread_pool *pool, int max_io)
fprintf(stderr, "io_setup(%d) returned %d\n", max_io, ret);
return nullptr;
}
return new aio_linux(ctx, pool);
}
}

View File

@ -154,6 +154,7 @@ public:
int bind(native_file_handle &fd) override { return 0; }
int unbind(const native_file_handle &fd) override { return 0; }
const char *get_implementation() const override { return "simulated"; }
};
aio *create_simulated_aio(thread_pool *tp)

View File

@ -131,6 +131,7 @@ public:
: GetLastError();
}
int unbind(const native_file_handle& fd) override { return 0; }
const char *get_implementation() const override { return "completion ports"; }
};
aio* create_win_aio(thread_pool* pool, int max_io)

View File

@ -128,12 +128,21 @@ constexpr size_t MAX_AIO_USERDATA_LEN= 4 * sizeof(void*);
struct aiocb
#ifdef _WIN32
:OVERLAPPED
#elif defined LINUX_NATIVE_AIO
:iocb
#elif defined HAVE_URING
:iovec
#endif
{
#if defined LINUX_NATIVE_AIO || defined HAVE_URING
union {
# ifdef LINUX_NATIVE_AIO
/** The context between io_submit() and io_getevents();
must be the first data member! */
iocb m_iocb;
# endif
# ifdef HAVE_URING
/** The context between io_uring_submit() and io_uring_wait_cqe() */
iovec m_iovec;
# endif
};
#endif
native_file_handle m_fh;
aio_opcode m_opcode;
unsigned long long m_offset;
@ -173,6 +182,7 @@ public:
virtual int bind(native_file_handle &fd)= 0;
/** "Unind" file to AIO handler (used on Windows only) */
virtual int unbind(const native_file_handle &fd)= 0;
virtual const char *get_implementation() const=0;
virtual ~aio(){};
protected:
static void synchronous(aiocb *cb);
@ -202,12 +212,21 @@ class thread_pool;
extern aio *create_simulated_aio(thread_pool *tp);
enum aio_implementation
{
OS_DEFAULT
#ifdef __linux__
, OS_IO_URING
, OS_AIO
#endif
};
class thread_pool
{
protected:
/* AIO handler */
std::unique_ptr<aio> m_aio;
virtual aio *create_native_aio(int max_io)= 0;
std::unique_ptr<aio> m_aio{};
virtual aio *create_native_aio(int max_io, aio_implementation)= 0;
public:
/**
@ -217,10 +236,7 @@ public:
void (*m_worker_init_callback)(void)= [] {};
void (*m_worker_destroy_callback)(void)= [] {};
thread_pool()
: m_aio()
{
}
thread_pool()= default;
virtual void submit_task(task *t)= 0;
virtual timer* create_timer(callback_func func, void *data=nullptr) = 0;
void set_thread_callbacks(void (*init)(), void (*destroy)())
@ -230,10 +246,10 @@ public:
m_worker_init_callback= init;
m_worker_destroy_callback= destroy;
}
int configure_aio(bool use_native_aio, int max_io)
int configure_aio(bool use_native_aio, int max_io, aio_implementation impl)
{
if (use_native_aio)
m_aio.reset(create_native_aio(max_io));
m_aio.reset(create_native_aio(max_io, impl));
else
m_aio.reset(create_simulated_aio(this));
return !m_aio ? -1 : 0;
@ -244,7 +260,12 @@ public:
assert(m_aio);
if (use_native_aio)
{
auto new_aio = create_native_aio(max_io);
const aio_implementation impl=
#ifdef LINUX_NATIVE_AIO
!strcmp(get_aio_implementation(), "Linux native AIO") ? OS_AIO :
#endif
OS_DEFAULT;
auto new_aio= create_native_aio(max_io, impl);
if (!new_aio)
return -1;
m_aio.reset(new_aio);
@ -256,6 +277,10 @@ public:
{
m_aio.reset();
}
const char *get_aio_implementation() const
{
return m_aio->get_implementation();
}
/**
Tweaks how fast worker threads are created, or how often they are signaled.

View File

@ -39,14 +39,14 @@ namespace tpool
{
#ifdef __linux__
#if defined(HAVE_URING) || defined(LINUX_NATIVE_AIO)
extern aio* create_linux_aio(thread_pool* tp, int max_io);
#else
aio *create_linux_aio(thread_pool *, int) { return nullptr; };
#endif
#endif
#ifdef _WIN32
extern aio* create_win_aio(thread_pool* tp, int max_io);
# if defined(HAVE_URING) || defined(LINUX_NATIVE_AIO)
aio *create_linux_aio(thread_pool* tp, int max_io, aio_implementation);
# else
static aio *create_linux_aio(thread_pool *, int, aio_implementation)
{ return nullptr; }
# endif
#elif defined _WIN32
aio *create_win_aio(thread_pool* tp, int max_io);
#endif
static const std::chrono::milliseconds LONG_TASK_DURATION = std::chrono::milliseconds(500);
@ -299,16 +299,15 @@ public:
void wait_begin() override;
void wait_end() override;
void submit_task(task *task) override;
aio *create_native_aio(int max_io) override
{
#ifdef _WIN32
return create_win_aio(this, max_io);
#elif defined(__linux__)
return create_linux_aio(this,max_io);
aio *create_native_aio(int max_io, aio_implementation) override
{ return create_win_aio(this, max_io); }
#elif defined __linux__
aio *create_native_aio(int max_io, aio_implementation impl) override
{ return create_linux_aio(this, max_io, impl); }
#else
return nullptr;
aio *create_native_aio(int, aio_implementation) override { return nullptr; }
#endif
}
class timer_generic : public thr_timer_t, public timer
{

View File

@ -206,6 +206,11 @@ class thread_pool_win : public thread_pool
CloseThreadpoolIo(fd.m_ptp_io);
return 0;
}
/**
Expose implementation.
*/
const char *get_implementation() const override { return "ThreadPool"; }
};
PTP_POOL m_ptp_pool;
@ -268,7 +273,7 @@ public:
abort();
}
aio *create_native_aio(int max_io) override
aio *create_native_aio(int max_io, aio_implementation) override
{
return new native_aio(*this, max_io);
}