diff --git a/debian/autobake-deb.sh b/debian/autobake-deb.sh index 04a8c2f5e48..be52280adce 100755 --- a/debian/autobake-deb.sh +++ b/debian/autobake-deb.sh @@ -64,11 +64,10 @@ add_lsb_base_depends() sed -e 's#lsof #lsb-base (>= 3.0-10),\n lsof #' -i debian/control } -replace_uring_with_aio() +remove_uring() { - sed 's/liburing-dev/libaio-dev/g' -i debian/control - sed -e '/-DIGNORE_AIO_CHECK=ON/d' \ - -e '/-DWITH_URING=ON/d' -i debian/rules + sed -e '/liburing-dev/d' -i debian/control + sed -e '/-DWITH_URING=ON/d' -i debian/rules } disable_libfmt() @@ -116,7 +115,7 @@ in # Debian "buster") disable_libfmt - replace_uring_with_aio + remove_uring ;& "bullseye") add_lsb_base_depends @@ -127,7 +126,7 @@ in # so no removal is necessary. if [[ ! "$architecture" =~ amd64|arm64|armel|armhf|i386|mips64el|mipsel|ppc64el|s390x ]] then - replace_uring_with_aio + remove_uring fi ;& "trixie"|"sid") @@ -136,8 +135,8 @@ in ;; # Ubuntu "focal") - replace_uring_with_aio disable_libfmt + remove_uring ;& "jammy"|"kinetic") add_lsb_base_depends diff --git a/debian/rules b/debian/rules index 07d922f705c..cbcecd8d607 100644 --- a/debian/rules +++ b/debian/rules @@ -87,9 +87,6 @@ endif # quality standards in Debian. Also building it requires an extra 4 GB of disk # space which makes native Debian builds fail as the total disk space needed # for MariaDB becomes over 10 GB. Only build CS via autobake-deb.sh. - # - # Note: Don't use '-DWITH_URING=ON' as some Buildbot builders are missing it - # and would fail permanently. PATH=$${MYSQL_BUILD_PATH:-"/usr/lib/ccache:/usr/local/bin:/usr/bin:/bin"} \ dh_auto_configure --builddirectory=$(BUILDDIR) -- \ -DCMAKE_BUILD_TYPE=RelWithDebInfo \ @@ -103,6 +100,8 @@ endif -DPLUGIN_AWS_KEY_MANAGEMENT=NO \ -DPLUGIN_COLUMNSTORE=NO \ -DIGNORE_AIO_CHECK=ON \ + -DWITH_URING=ON \ + -DWITH_LIBAIO=ON \ -DDEB=$(DEB_VENDOR) # This is needed, otherwise 'make test' will run before binaries have been built diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc index 146709ca5e5..b83b0309b9a 100644 --- a/extra/mariabackup/xtrabackup.cc +++ b/extra/mariabackup/xtrabackup.cc @@ -378,6 +378,10 @@ extern const char *innodb_checksum_algorithm_names[]; extern TYPELIB innodb_checksum_algorithm_typelib; extern const char *innodb_flush_method_names[]; extern TYPELIB innodb_flush_method_typelib; +#ifdef __linux__ +extern const char *innodb_linux_aio_names[]; +extern TYPELIB innodb_linux_aio_typelib; +#endif static const char *binlog_info_values[] = {"off", "lockless", "on", "auto", NullS}; @@ -1334,6 +1338,9 @@ enum options_xtrabackup OPT_INNODB_READ_IO_THREADS, OPT_INNODB_WRITE_IO_THREADS, OPT_INNODB_USE_NATIVE_AIO, +#ifdef __linux__ + OPT_INNODB_LINUX_AIO, +#endif OPT_INNODB_PAGE_SIZE, OPT_INNODB_BUFFER_POOL_FILENAME, OPT_INNODB_LOCK_WAIT_TIMEOUT, @@ -1934,6 +1941,14 @@ struct my_option xb_server_options[] = (G_PTR*) &srv_use_native_aio, (G_PTR*) &srv_use_native_aio, 0, GET_BOOL, NO_ARG, TRUE, 0, 0, 0, 0, 0}, +#ifdef __linux__ + {"innodb_linux_aio", OPT_INNODB_LINUX_AIO, + "Which linux AIO implementation to use, auto (io_uring, failing to aio) or explicit", + (G_PTR*) &srv_linux_aio_method, + (G_PTR*) &srv_linux_aio_method, + &innodb_linux_aio_typelib, GET_ENUM, REQUIRED_ARG, + SRV_LINUX_AIO_AUTO, 0, 0, 0, 0, 0}, +#endif {"innodb_page_size", OPT_INNODB_PAGE_SIZE, "The universal page size of the database.", (G_PTR*) &innobase_page_size, (G_PTR*) &innobase_page_size, 0, @@ -2529,19 +2544,8 @@ static bool innodb_init_param() ut_ad(DATA_MYSQL_BINARY_CHARSET_COLL == my_charset_bin.number); -#ifdef _WIN32 +#if defined(_WIN32) || defined(LINUX_NATIVE_AIO) || defined(HAVE_URING) srv_use_native_aio = TRUE; - -#elif defined(LINUX_NATIVE_AIO) - - if (srv_use_native_aio) { - msg("InnoDB: Using Linux native AIO"); - } -#elif defined(HAVE_URING) - - if (srv_use_native_aio) { - msg("InnoDB: Using liburing"); - } #else /* Currently native AIO is supported only on windows and linux and that also when the support is compiled in. In all other @@ -5473,7 +5477,6 @@ fail: xb_fil_io_init(); if (os_aio_init()) { - msg("Error: cannot initialize AIO subsystem"); goto fail; } diff --git a/mysql-test/mariadb-test-run.pl b/mysql-test/mariadb-test-run.pl index 8b3dbf9881f..cf6f4cbb854 100755 --- a/mysql-test/mariadb-test-run.pl +++ b/mysql-test/mariadb-test-run.pl @@ -4548,7 +4548,7 @@ sub extract_warning_lines ($$) { qr|InnoDB: io_setup\(\) attempt|, qr|InnoDB: io_setup\(\) failed with EAGAIN|, qr|io_uring_queue_init\(\) failed with|, - qr|InnoDB: liburing disabled|, + qr|InnoDB: io_uring failed: falling back to libaio|, qr/InnoDB: Failed to set O_DIRECT on file/, qr|setrlimit could not change the size of core files to 'infinity';|, qr|failed to retrieve the MAC address|, diff --git a/mysql-test/suite/sys_vars/r/innodb_linux_aio_basic.result b/mysql-test/suite/sys_vars/r/innodb_linux_aio_basic.result new file mode 100644 index 00000000000..5f72c246d71 --- /dev/null +++ b/mysql-test/suite/sys_vars/r/innodb_linux_aio_basic.result @@ -0,0 +1,21 @@ +select @@global.innodb_linux_aio; +@@global.innodb_linux_aio +auto +select @@session.innodb_linux_aio; +ERROR HY000: Variable 'innodb_linux_aio' is a GLOBAL variable +show global variables like 'innodb_linux_aio'; +Variable_name Value +innodb_linux_aio auto +show session variables like 'innodb_linux_aio'; +Variable_name Value +innodb_linux_aio auto +select * from information_schema.global_variables where variable_name='innodb_linux_aio'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_LINUX_AIO auto +select * from information_schema.session_variables where variable_name='innodb_linux_aio'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_LINUX_AIO auto +set global innodb_linux_aio='auto'; +ERROR HY000: Variable 'innodb_linux_aio' is a read only variable +set session innodb_linux_aio='aio'; +ERROR HY000: Variable 'innodb_linux_aio' is a read only variable diff --git a/mysql-test/suite/sys_vars/r/sysvars_innodb.result b/mysql-test/suite/sys_vars/r/sysvars_innodb.result index 5810f23f14e..d34055b161d 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_innodb.result +++ b/mysql-test/suite/sys_vars/r/sysvars_innodb.result @@ -5,6 +5,7 @@ variable_name not in ( 'innodb_evict_tables_on_commit_debug', # one may want to override this 'innodb_use_native_aio', # default value depends on OS 'innodb_log_file_buffering', # only available on Linux and Windows +'innodb_linux_aio', # existence depends on OS 'innodb_buffer_pool_load_pages_abort') # debug build only, and is only for testing order by variable_name; VARIABLE_NAME INNODB_ADAPTIVE_FLUSHING diff --git a/mysql-test/suite/sys_vars/t/innodb_linux_aio_basic.test b/mysql-test/suite/sys_vars/t/innodb_linux_aio_basic.test new file mode 100644 index 00000000000..e5dcdb081cb --- /dev/null +++ b/mysql-test/suite/sys_vars/t/innodb_linux_aio_basic.test @@ -0,0 +1,23 @@ +--source include/have_innodb.inc +--source include/linux.inc +# enum readonly + +# +# show values; +# +select @@global.innodb_linux_aio; +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +select @@session.innodb_linux_aio; +show global variables like 'innodb_linux_aio'; +show session variables like 'innodb_linux_aio'; +select * from information_schema.global_variables where variable_name='innodb_linux_aio'; +select * from information_schema.session_variables where variable_name='innodb_linux_aio'; + +# +# show that it's read-only +# +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +set global innodb_linux_aio='auto'; +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +set session innodb_linux_aio='aio'; + diff --git a/mysql-test/suite/sys_vars/t/sysvars_innodb.test b/mysql-test/suite/sys_vars/t/sysvars_innodb.test index 4c5ad1f676c..250eb8b5c8f 100644 --- a/mysql-test/suite/sys_vars/t/sysvars_innodb.test +++ b/mysql-test/suite/sys_vars/t/sysvars_innodb.test @@ -16,5 +16,6 @@ select VARIABLE_NAME, SESSION_VALUE, DEFAULT_VALUE, VARIABLE_SCOPE, VARIABLE_TYP 'innodb_evict_tables_on_commit_debug', # one may want to override this 'innodb_use_native_aio', # default value depends on OS 'innodb_log_file_buffering', # only available on Linux and Windows + 'innodb_linux_aio', # existence depends on OS 'innodb_buffer_pool_load_pages_abort') # debug build only, and is only for testing order by variable_name; diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 29757c28e99..b69ecbeddfc 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -315,6 +315,25 @@ static TYPELIB innodb_stats_method_typelib = { NULL }; +/** Possible values for system variable "innodb_linux_aio" */ +#ifdef __linux__ +const char* innodb_linux_aio_names[] = { + "auto", /* SRV_LINUX_AIO_AUTO */ + "io_uring", /* SRV_LINUX_AIO_IO_URING */ + "aio", /* SRV_LINUX_AIO_LIBAIO */ + NullS +}; + +/** Used to define an enumerate type of the system variable +innodb_linux_aio. Used by mariadb-backup too. */ +TYPELIB innodb_linux_aio_typelib = { + array_elements(innodb_linux_aio_names) - 1, + "innodb_linux_aio_typelib", + innodb_linux_aio_names, + NULL +}; +#endif + /** Possible values of the parameter innodb_checksum_algorithm */ const char* innodb_checksum_algorithm_names[] = { "crc32", @@ -19664,6 +19683,15 @@ static MYSQL_SYSVAR_BOOL(use_native_aio, srv_use_native_aio, "Use native AIO if supported on this platform.", NULL, NULL, TRUE); +#ifdef __linux__ +static MYSQL_SYSVAR_ENUM(linux_aio, srv_linux_aio_method, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Specifies which Linux AIO implementation should be used." + " Possible value are \"auto\" (default) to select io_uring" + " and fallback to aio, or explicit \"io_uring\" or \"aio\"", + nullptr, nullptr, SRV_LINUX_AIO_AUTO, &innodb_linux_aio_typelib); +#endif + #ifdef HAVE_LIBNUMA static MYSQL_SYSVAR_BOOL(numa_interleave, srv_numa_interleave, PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, @@ -20059,6 +20087,9 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(tmpdir), MYSQL_SYSVAR(autoinc_lock_mode), MYSQL_SYSVAR(use_native_aio), +#ifdef __linux__ + MYSQL_SYSVAR(linux_aio), +#endif #ifdef HAVE_LIBNUMA MYSQL_SYSVAR(numa_interleave), #endif /* HAVE_LIBNUMA */ diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index 689fbdbdbc7..cf13bd9df69 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -77,6 +77,19 @@ enum srv_flush_t #endif }; +/** Possible values of innodb_linux_aio */ +#ifdef __linux__ +enum srv_linux_aio_t +{ + /** auto, io_uring first and then aio */ + SRV_LINUX_AIO_AUTO, + /** io_uring */ + SRV_LINUX_AIO_IO_URING, + /** aio (libaio interface) */ + SRV_LINUX_AIO_LIBAIO +}; +#endif + /** innodb_flush_method */ extern ulong srv_file_flush_method; diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index a233a691541..fafb6218002 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -178,6 +178,12 @@ OS (provided we compiled Innobase with it in), otherwise we will use simulated aio. Currently we support native aio on windows and linux */ extern my_bool srv_use_native_aio; + +#ifdef __linux__ +/* This enum is defined which linux native io method to use */ +extern ulong srv_linux_aio_method; +#endif + extern my_bool srv_numa_interleave; /* Use atomic writes i.e disable doublewrite buffer */ diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc index c0bb760cea7..26e9efd245e 100644 --- a/storage/innobase/os/os0file.cc +++ b/storage/innobase/os/os0file.cc @@ -52,10 +52,6 @@ Created 10/21/1995 Heikki Tuuri #include -#ifdef LINUX_NATIVE_AIO -#include -#endif /* LINUX_NATIVE_AIO */ - #ifdef HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE # include # include @@ -3084,132 +3080,6 @@ static void write_io_callback(void *c) write_slots->release(cb); } -#ifdef LINUX_NATIVE_AIO -/** Checks if the system supports native linux aio. On some kernel -versions where native aio is supported it won't work on tmpfs. In such -cases we can't use native aio. - -@return: true if supported, false otherwise. */ -static bool is_linux_native_aio_supported() -{ - File fd; - io_context_t io_ctx; - std::string log_file_path = get_log_file_path(); - - memset(&io_ctx, 0, sizeof(io_ctx)); - if (io_setup(1, &io_ctx)) { - - /* The platform does not support native aio. */ - - return(false); - - } - else if (!srv_read_only_mode) { - - /* Now check if tmpdir supports native aio ops. */ - fd = mysql_tmpfile("ib"); - - if (fd < 0) { - ib::warn() - << "Unable to create temp file to check" - " native AIO support."; - - int ret = io_destroy(io_ctx); - ut_a(ret != -EINVAL); - ut_ad(ret != -EFAULT); - - return(false); - } - } - else { - fd = my_open(log_file_path.c_str(), O_RDONLY | O_CLOEXEC, - MYF(0)); - - if (fd == -1) { - - ib::warn() << "Unable to open \"" << log_file_path - << "\" to check native" - << " AIO read support."; - - int ret = io_destroy(io_ctx); - ut_a(ret != EINVAL); - ut_ad(ret != EFAULT); - - return(false); - } - } - - struct io_event io_event; - - memset(&io_event, 0x0, sizeof(io_event)); - - byte* ptr = static_cast(aligned_malloc(srv_page_size, - srv_page_size)); - - struct iocb iocb; - - /* Suppress valgrind warning. */ - memset(ptr, 0, srv_page_size); - memset(&iocb, 0x0, sizeof(iocb)); - - struct iocb* p_iocb = &iocb; - - if (!srv_read_only_mode) { - - io_prep_pwrite(p_iocb, fd, ptr, srv_page_size, 0); - - } - else { - ut_a(srv_page_size >= 512); - io_prep_pread(p_iocb, fd, ptr, 512, 0); - } - - int err = io_submit(io_ctx, 1, &p_iocb); - - if (err >= 1) { - /* Now collect the submitted IO request. */ - err = io_getevents(io_ctx, 1, 1, &io_event, NULL); - } - - aligned_free(ptr); - my_close(fd, MYF(MY_WME)); - - switch (err) { - case 1: - { - int ret = io_destroy(io_ctx); - ut_a(ret != -EINVAL); - ut_ad(ret != -EFAULT); - - return(true); - } - - case -EINVAL: - case -ENOSYS: - ib::warn() - << "Linux Native AIO not supported. You can either" - " move " - << (srv_read_only_mode ? log_file_path : "tmpdir") - << " to a file system that supports native" - " AIO or you can set innodb_use_native_aio to" - " FALSE to avoid this message."; - - /* fall through. */ - default: - ib::warn() - << "Linux Native AIO check on " - << (srv_read_only_mode ? log_file_path : "tmpdir") - << "returned error[" << -err << "]"; - } - - int ret = io_destroy(io_ctx); - ut_a(ret != -EINVAL); - ut_ad(ret != -EFAULT); - - return(false); -} -#endif - int os_aio_init() noexcept { int max_write_events= int(srv_n_write_io_threads * @@ -3217,41 +3087,56 @@ int os_aio_init() noexcept int max_read_events= int(srv_n_read_io_threads * OS_AIO_N_PENDING_IOS_PER_THREAD); int max_events= max_read_events + max_write_events; - int ret; -#if LINUX_NATIVE_AIO - if (srv_use_native_aio && !is_linux_native_aio_supported()) - goto disable; -#endif + int ret= 1; - ret= srv_thread_pool->configure_aio(srv_use_native_aio, max_events); - -#ifdef LINUX_NATIVE_AIO - if (ret) +#if defined __linux__ && (defined HAVE_URING || defined LINUX_NATIVE_AIO) + if (srv_use_native_aio) { - ut_ad(srv_use_native_aio); -disable: - ib::warn() << "Linux Native AIO disabled."; - srv_use_native_aio= false; - ret= srv_thread_pool->configure_aio(false, max_events); + switch (srv_linux_aio_method) { + case SRV_LINUX_AIO_AUTO: + case SRV_LINUX_AIO_IO_URING: +# ifdef HAVE_URING + ret= srv_thread_pool->configure_aio(srv_use_native_aio, max_events, + tpool::OS_IO_URING); +# endif +# ifdef LINUX_NATIVE_AIO +# ifdef HAVE_URING + if (ret && srv_linux_aio_method == SRV_LINUX_AIO_AUTO) + sql_print_warning("InnoDB: io_uring failed: falling back to libaio"); + else + break; + /* fallthough */ +# endif /* HAVE_URING */ + case SRV_LINUX_AIO_LIBAIO: + ret= srv_thread_pool->configure_aio(srv_use_native_aio, max_events, + tpool::OS_AIO); +# endif + } + if (ret) + { + srv_use_native_aio= false; + sql_print_warning("InnoDB: native AIO failed: falling back to" + " innodb_use_native_aio=OFF"); + } + else + sql_print_information("InnoDB: Using %s", srv_thread_pool + ->get_aio_implementation()); } -#endif +#endif /* linux */ -#ifdef HAVE_URING if (ret) - { - ut_ad(srv_use_native_aio); - ib::warn() - << "liburing disabled: falling back to innodb_use_native_aio=OFF"; - srv_use_native_aio= false; - ret= srv_thread_pool->configure_aio(false, max_events); - } -#endif + ret= srv_thread_pool->configure_aio(srv_use_native_aio, + max_events, + tpool::OS_DEFAULT); if (!ret) { read_slots= new io_slots(max_read_events, srv_n_read_io_threads); write_slots= new io_slots(max_write_events, srv_n_write_io_threads); } + else + sql_print_error("InnoDB: Cannot initialize AIO sub-system"); + return ret; } @@ -3290,8 +3175,8 @@ int os_aio_resize(ulint n_reader_threads, ulint n_writer_threads) noexcept int max_write_events= int(n_writer_threads * OS_AIO_N_PENDING_IOS_PER_THREAD); int events= max_read_events + max_write_events; - /** Do the Linux AIO dance (this will try to create a new - io context with changed max_events ,etc*/ + /* Do the Linux AIO dance (this will try to create a new + io context with changed max_events, etc.) */ int ret= srv_thread_pool->reconfigure_aio(srv_use_native_aio, events); diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index 0a21c69fb38..28ef4d55a8d 100644 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -137,6 +137,10 @@ OS (provided we compiled Innobase with it in), otherwise we will use simulated aio we build below with threads. Currently we support native aio on windows and linux */ my_bool srv_use_native_aio; +#ifdef __linux__ +/* This enum is defined which linux native io method to use */ +ulong srv_linux_aio_method; +#endif my_bool srv_numa_interleave; /** copy of innodb_use_atomic_writes; @see innodb_init_params() */ my_bool srv_use_atomic_writes; diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index ba8b202191d..abf5996615b 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -1287,22 +1287,9 @@ dberr_t srv_start(bool create_new_db) } if (os_aio_init()) { - ib::error() << "Cannot initialize AIO sub-system"; - return(srv_init_abort(DB_ERROR)); } -#ifdef LINUX_NATIVE_AIO - if (srv_use_native_aio) { - ib::info() << "Using Linux native AIO"; - } -#endif -#ifdef HAVE_URING - if (srv_use_native_aio) { - ib::info() << "Using liburing"; - } -#endif - fil_system.create(srv_file_per_table ? 50000 : 5000); if (buf_pool.create()) { diff --git a/tpool/CMakeLists.txt b/tpool/CMakeLists.txt index cf35633b090..85a3fc74f53 100644 --- a/tpool/CMakeLists.txt +++ b/tpool/CMakeLists.txt @@ -3,16 +3,18 @@ IF(WIN32) SET(EXTRA_SOURCES tpool_win.cc aio_win.cc) ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Linux") OPTION(WITH_URING "Require that io_uring be used" OFF) - OPTION(WITH_LIBAIO "Require that libaio is used, unless uring is there" OFF) + OPTION(WITH_LIBAIO "Require that libaio is used" OFF) IF(WITH_URING) SET(URING_REQUIRED REQUIRED) - ELSEIF(WITH_LIBAIO) + ENDIF() + IF(WITH_LIBAIO) SET(LIBAIO_REQUIRED REQUIRED) ENDIF() + SET(EXTRA_SOURCES) FIND_PACKAGE(URING QUIET ${URING_REQUIRED}) IF(URING_FOUND) SET(URING_FOUND ${URING_FOUND} PARENT_SCOPE) - SET(TPOOL_DEFINES "-DHAVE_URING" PARENT_SCOPE) + SET(TPOOL_DEFINES "-DHAVE_URING") ADD_DEFINITIONS(-DHAVE_URING) LINK_LIBRARIES(${URING_LIBRARIES}) INCLUDE_DIRECTORIES(${URING_INCLUDE_DIRS}) @@ -27,16 +29,16 @@ ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Linux") IF(HAVE_IO_URING_MLOCK_SIZE) SET_SOURCE_FILES_PROPERTIES(aio_liburing.cc PROPERTIES COMPILE_FLAGS "-DHAVE_IO_URING_MLOCK_SIZE") ENDIF() - ELSE() - FIND_PACKAGE(LIBAIO QUIET ${LIBAIO_REQUIRED}) - IF(LIBAIO_FOUND) - SET(TPOOL_DEFINES "-DLINUX_NATIVE_AIO" PARENT_SCOPE) - ADD_DEFINITIONS(-DLINUX_NATIVE_AIO) - INCLUDE_DIRECTORIES(${LIBAIO_INCLUDE_DIRS}) - LINK_LIBRARIES(${LIBAIO_LIBRARIES}) - SET(EXTRA_SOURCES aio_linux.cc) - ENDIF() ENDIF() + FIND_PACKAGE(LIBAIO QUIET ${LIBAIO_REQUIRED}) + IF(LIBAIO_FOUND) + SET(TPOOL_DEFINES ${TPOOL_DEFINES} "-DLINUX_NATIVE_AIO") + ADD_DEFINITIONS(-DLINUX_NATIVE_AIO) + INCLUDE_DIRECTORIES(${LIBAIO_INCLUDE_DIRS}) + LINK_LIBRARIES(${LIBAIO_LIBRARIES}) + SET(EXTRA_SOURCES ${EXTRA_SOURCES} aio_linux.cc) + ENDIF() + SET(TPOOL_DEFINES ${TPOOL_DEFINES} PARENT_SCOPE) ENDIF() ADD_LIBRARY(tpool STATIC diff --git a/tpool/aio_liburing.cc b/tpool/aio_liburing.cc index acc1c6174e2..47a144d636d 100644 --- a/tpool/aio_liburing.cc +++ b/tpool/aio_liburing.cc @@ -79,8 +79,9 @@ public: thread_= std::thread(thread_routine, this); } + const char *get_implementation() const override { return "io_uring"; }; - ~aio_uring() noexcept + ~aio_uring() noexcept override { { std::lock_guard _(mutex_); @@ -103,8 +104,8 @@ public: int submit_io(tpool::aiocb *cb) final { - cb->iov_base= cb->m_buffer; - cb->iov_len= cb->m_len; + cb->m_iovec.iov_base= cb->m_buffer; + cb->m_iovec.iov_len= cb->m_len; // The whole operation since io_uring_get_sqe() and till io_uring_submit() // must be atomical. This is because liburing provides thread-unsafe calls. @@ -112,11 +113,9 @@ public: io_uring_sqe *sqe= io_uring_get_sqe(&uring_); if (cb->m_opcode == tpool::aio_opcode::AIO_PREAD) - io_uring_prep_readv(sqe, cb->m_fh, static_cast(cb), 1, - cb->m_offset); + io_uring_prep_readv(sqe, cb->m_fh, &cb->m_iovec, 1, cb->m_offset); else - io_uring_prep_writev(sqe, cb->m_fh, static_cast(cb), 1, - cb->m_offset); + io_uring_prep_writev(sqe, cb->m_fh, &cb->m_iovec, 1, cb->m_offset); io_uring_sqe_set_data(sqe, cb); return io_uring_submit(&uring_) == 1 ? 0 : -1; @@ -203,11 +202,27 @@ private: namespace tpool { -aio *create_linux_aio(thread_pool *pool, int max_aio) +#ifdef LINUX_NATIVE_AIO +aio *create_libaio(thread_pool* tp, int max_io); +#endif + +aio *create_linux_aio(thread_pool *pool, int max_aio, + aio_implementation implementation) { - try { - return new aio_uring(pool, max_aio); - } catch (std::runtime_error& error) { + switch (implementation) { + case OS_DEFAULT: + case OS_IO_URING: + try { + return new aio_uring(pool, max_aio); + } catch (std::runtime_error&) { + return nullptr; + } + break; +#ifdef LINUX_NATIVE_AIO + case OS_AIO: + return create_libaio(pool, max_aio); +#endif + default: return nullptr; } } diff --git a/tpool/aio_linux.cc b/tpool/aio_linux.cc index 0955a6dded4..f8088cda516 100644 --- a/tpool/aio_linux.cc +++ b/tpool/aio_linux.cc @@ -13,14 +13,9 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111 - 1301 USA*/ -#include "tpool_structs.h" #include "tpool.h" - -# include -# include -# include -# include -# include +#include +#include /** Invoke the io_getevents() system call, without timeout parameter. @@ -58,6 +53,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111 - 1301 USA*/ will make this version of io_getevents return EINVAL. */ static int my_getevents(io_context_t ctx, long min_nr, long nr, io_event *ev) + noexcept { int saved_errno= errno; int ret= syscall(__NR_io_getevents, reinterpret_cast(ctx), @@ -81,8 +77,9 @@ static int my_getevents(io_context_t ctx, long min_nr, long nr, io_event *ev) with io_getevents() and forward io completion callback to the worker threadpool. */ -namespace tpool +namespace { +using namespace tpool; class aio_linux final : public aio { @@ -120,7 +117,7 @@ class aio_linux final : public aio for (int i= 0; i < ret; i++) { const io_event &event= events[i]; - aiocb *iocb= static_cast(event.obj); + aiocb *iocb= reinterpret_cast(event.obj); if (static_cast(event.res) < 0) { iocb->m_err= -event.res; @@ -160,11 +157,10 @@ public: int submit_io(aiocb *cb) override { - io_prep_pread(static_cast(cb), cb->m_fh, cb->m_buffer, cb->m_len, - cb->m_offset); + io_prep_pread(&cb->m_iocb, cb->m_fh, cb->m_buffer, cb->m_len, cb->m_offset); if (cb->m_opcode != aio_opcode::AIO_PREAD) - cb->aio_lio_opcode= IO_CMD_PWRITE; - iocb *icb= static_cast(cb); + cb->m_iocb.aio_lio_opcode= IO_CMD_PWRITE; + iocb *icb= &cb->m_iocb; int ret= io_submit(m_io_ctx, 1, &icb); if (ret == 1) return 0; @@ -174,11 +170,20 @@ public: int bind(native_file_handle&) override { return 0; } int unbind(const native_file_handle&) override { return 0; } + const char *get_implementation() const override { return "Linux native AIO"; }; }; std::atomic aio_linux::shutdown_in_progress; +} -aio *create_linux_aio(thread_pool *pool, int max_io) +namespace tpool +{ + +#ifdef HAVE_URING +aio *create_libaio(thread_pool *pool, int max_io) +#else +aio *create_linux_aio(thread_pool *pool, int max_io, aio_implementation) +#endif { io_context_t ctx; memset(&ctx, 0, sizeof ctx); @@ -187,6 +192,7 @@ aio *create_linux_aio(thread_pool *pool, int max_io) fprintf(stderr, "io_setup(%d) returned %d\n", max_io, ret); return nullptr; } + return new aio_linux(ctx, pool); } } diff --git a/tpool/aio_simulated.cc b/tpool/aio_simulated.cc index cf1e7dca4c9..5a18fb03457 100644 --- a/tpool/aio_simulated.cc +++ b/tpool/aio_simulated.cc @@ -154,6 +154,7 @@ public: int bind(native_file_handle &fd) override { return 0; } int unbind(const native_file_handle &fd) override { return 0; } + const char *get_implementation() const override { return "simulated"; } }; aio *create_simulated_aio(thread_pool *tp) diff --git a/tpool/aio_win.cc b/tpool/aio_win.cc index f483e3ca1e1..3c9143a51a0 100644 --- a/tpool/aio_win.cc +++ b/tpool/aio_win.cc @@ -131,6 +131,7 @@ public: : GetLastError(); } int unbind(const native_file_handle& fd) override { return 0; } + const char *get_implementation() const override { return "completion ports"; } }; aio* create_win_aio(thread_pool* pool, int max_io) diff --git a/tpool/tpool.h b/tpool/tpool.h index b33317572e5..f5640c1e3a9 100644 --- a/tpool/tpool.h +++ b/tpool/tpool.h @@ -128,12 +128,21 @@ constexpr size_t MAX_AIO_USERDATA_LEN= 4 * sizeof(void*); struct aiocb #ifdef _WIN32 :OVERLAPPED -#elif defined LINUX_NATIVE_AIO - :iocb -#elif defined HAVE_URING - :iovec #endif { +#if defined LINUX_NATIVE_AIO || defined HAVE_URING + union { +# ifdef LINUX_NATIVE_AIO + /** The context between io_submit() and io_getevents(); + must be the first data member! */ + iocb m_iocb; +# endif +# ifdef HAVE_URING + /** The context between io_uring_submit() and io_uring_wait_cqe() */ + iovec m_iovec; +# endif + }; +#endif native_file_handle m_fh; aio_opcode m_opcode; unsigned long long m_offset; @@ -173,6 +182,7 @@ public: virtual int bind(native_file_handle &fd)= 0; /** "Unind" file to AIO handler (used on Windows only) */ virtual int unbind(const native_file_handle &fd)= 0; + virtual const char *get_implementation() const=0; virtual ~aio(){}; protected: static void synchronous(aiocb *cb); @@ -202,12 +212,21 @@ class thread_pool; extern aio *create_simulated_aio(thread_pool *tp); +enum aio_implementation +{ + OS_DEFAULT +#ifdef __linux__ + , OS_IO_URING + , OS_AIO +#endif +}; + class thread_pool { protected: /* AIO handler */ - std::unique_ptr m_aio; - virtual aio *create_native_aio(int max_io)= 0; + std::unique_ptr m_aio{}; + virtual aio *create_native_aio(int max_io, aio_implementation)= 0; public: /** @@ -217,10 +236,7 @@ public: void (*m_worker_init_callback)(void)= [] {}; void (*m_worker_destroy_callback)(void)= [] {}; - thread_pool() - : m_aio() - { - } + thread_pool()= default; virtual void submit_task(task *t)= 0; virtual timer* create_timer(callback_func func, void *data=nullptr) = 0; void set_thread_callbacks(void (*init)(), void (*destroy)()) @@ -230,10 +246,10 @@ public: m_worker_init_callback= init; m_worker_destroy_callback= destroy; } - int configure_aio(bool use_native_aio, int max_io) + int configure_aio(bool use_native_aio, int max_io, aio_implementation impl) { if (use_native_aio) - m_aio.reset(create_native_aio(max_io)); + m_aio.reset(create_native_aio(max_io, impl)); else m_aio.reset(create_simulated_aio(this)); return !m_aio ? -1 : 0; @@ -244,7 +260,12 @@ public: assert(m_aio); if (use_native_aio) { - auto new_aio = create_native_aio(max_io); + const aio_implementation impl= +#ifdef LINUX_NATIVE_AIO + !strcmp(get_aio_implementation(), "Linux native AIO") ? OS_AIO : +#endif + OS_DEFAULT; + auto new_aio= create_native_aio(max_io, impl); if (!new_aio) return -1; m_aio.reset(new_aio); @@ -256,6 +277,10 @@ public: { m_aio.reset(); } + const char *get_aio_implementation() const + { + return m_aio->get_implementation(); + } /** Tweaks how fast worker threads are created, or how often they are signaled. diff --git a/tpool/tpool_generic.cc b/tpool/tpool_generic.cc index 8d7e6a754bc..567fa35bd8d 100644 --- a/tpool/tpool_generic.cc +++ b/tpool/tpool_generic.cc @@ -39,14 +39,14 @@ namespace tpool { #ifdef __linux__ -#if defined(HAVE_URING) || defined(LINUX_NATIVE_AIO) - extern aio* create_linux_aio(thread_pool* tp, int max_io); -#else - aio *create_linux_aio(thread_pool *, int) { return nullptr; }; -#endif -#endif -#ifdef _WIN32 - extern aio* create_win_aio(thread_pool* tp, int max_io); +# if defined(HAVE_URING) || defined(LINUX_NATIVE_AIO) + aio *create_linux_aio(thread_pool* tp, int max_io, aio_implementation); +# else + static aio *create_linux_aio(thread_pool *, int, aio_implementation) + { return nullptr; } +# endif +#elif defined _WIN32 + aio *create_win_aio(thread_pool* tp, int max_io); #endif static const std::chrono::milliseconds LONG_TASK_DURATION = std::chrono::milliseconds(500); @@ -299,16 +299,15 @@ public: void wait_begin() override; void wait_end() override; void submit_task(task *task) override; - aio *create_native_aio(int max_io) override - { #ifdef _WIN32 - return create_win_aio(this, max_io); -#elif defined(__linux__) - return create_linux_aio(this,max_io); + aio *create_native_aio(int max_io, aio_implementation) override + { return create_win_aio(this, max_io); } +#elif defined __linux__ + aio *create_native_aio(int max_io, aio_implementation impl) override + { return create_linux_aio(this, max_io, impl); } #else - return nullptr; + aio *create_native_aio(int, aio_implementation) override { return nullptr; } #endif - } class timer_generic : public thr_timer_t, public timer { diff --git a/tpool/tpool_win.cc b/tpool/tpool_win.cc index 30867b8885c..a5539edbdd7 100644 --- a/tpool/tpool_win.cc +++ b/tpool/tpool_win.cc @@ -206,6 +206,11 @@ class thread_pool_win : public thread_pool CloseThreadpoolIo(fd.m_ptp_io); return 0; } + + /** + Expose implementation. + */ + const char *get_implementation() const override { return "ThreadPool"; } }; PTP_POOL m_ptp_pool; @@ -268,7 +273,7 @@ public: abort(); } - aio *create_native_aio(int max_io) override + aio *create_native_aio(int max_io, aio_implementation) override { return new native_aio(*this, max_io); }