From d510f80549121ac2a07588b5ee9d0252e2b3b60f Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Sat, 10 Feb 2024 00:30:47 +0100 Subject: [PATCH] MDEV-33482: Optimize WolfSSL for improved performance - Use "new" math library WOLFSSL_SP_MATH_ALL, which is now promoted by WolfSSL for faster performance. "fastmath" we used previously is going to be deprecated, it was not really always fast. - Optimize common RSA math operations with WOLFSSL_HAVE_SP_RSA - Incorporate assembly optimizations, currently for Intel x64 only This patch significantly reduces execution time for SSL tests like main.ssl-big and main.ssl_connect, which now run 2 to 3 times faster. Notably, when this patch is applied to 11.4, server startup in with ephemeral certificates becomes approximately 10x faster due to optimized wolfSSL_EVP_PKEY_keygen(). Additionally, refactored WolfSSL by removing old workarounds and consolidating wolfssl and wolfcrypt into a single library wolfssl, just like it was done in WolfSSL's own CMake. --- cmake/os/Windows.cmake | 8 ++ cmake/ssl.cmake | 2 +- extra/wolfssl/CMakeLists.txt | 192 ++++++++++++------------------- extra/wolfssl/user_settings.h.in | 18 +-- 4 files changed, 93 insertions(+), 127 deletions(-) diff --git a/cmake/os/Windows.cmake b/cmake/os/Windows.cmake index f47d4ebc98d..1a853349ae5 100644 --- a/cmake/os/Windows.cmake +++ b/cmake/os/Windows.cmake @@ -24,6 +24,14 @@ INCLUDE (CheckCSourceRuns) INCLUDE (CheckSymbolExists) INCLUDE (CheckTypeSize) +IF(MSVC) + IF(CMAKE_CXX_COMPILER_ARCHITECTURE_ID STREQUAL ARM64) + SET(MSVC_ARM64 1) + SET(MSVC_INTEL 0) + ELSE() + SET(MSVC_INTEL 1) + ENDIF() +ENDIF() # avoid running system checks by using pre-cached check results # system checks are expensive on VS since every tiny program is to be compiled in diff --git a/cmake/ssl.cmake b/cmake/ssl.cmake index 646aa37a91c..51176a84c51 100644 --- a/cmake/ssl.cmake +++ b/cmake/ssl.cmake @@ -53,7 +53,7 @@ MACRO (MYSQL_USE_BUNDLED_SSL) ${CMAKE_SOURCE_DIR}/extra/wolfssl/wolfssl ${CMAKE_SOURCE_DIR}/extra/wolfssl/wolfssl/wolfssl ) - SET(SSL_LIBRARIES wolfssl wolfcrypt) + SET(SSL_LIBRARIES wolfssl) SET(SSL_INCLUDE_DIRS ${INC_DIRS}) SET(SSL_DEFINES "-DHAVE_OPENSSL -DHAVE_WOLFSSL -DWOLFSSL_USER_SETTINGS") SET(HAVE_ERR_remove_thread_state ON CACHE INTERNAL "wolfssl doesn't have ERR_remove_thread_state") diff --git a/extra/wolfssl/CMakeLists.txt b/extra/wolfssl/CMakeLists.txt index 69f7801d4e3..e3f8da21f76 100644 --- a/extra/wolfssl/CMakeLists.txt +++ b/extra/wolfssl/CMakeLists.txt @@ -1,86 +1,57 @@ -IF(MSVC) +IF(MSVC_INTEL) PROJECT(wolfssl C ASM_MASM) ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64") - PROJECT(wolfssl C ASM) + PROJECT(wolfssl C ASM) ELSE() PROJECT(wolfssl C) ENDIF() IF(CMAKE_SIZEOF_VOID_P MATCHES 8) -IF(MSVC) +IF(MSVC_INTEL AND NOT (CMAKE_C_COMPILER_ID MATCHES Clang)) SET(WOLFSSL_INTELASM ON) - SET(WOLFSSL_X86_64_BUILD 1) SET(HAVE_INTEL_RDSEED 1) SET(HAVE_INTEL_RDRAND 1) -ELSEIF(CMAKE_ASM_COMPILER_ID MATCHES "Clang" AND CMAKE_VERSION VERSION_LESS 3.16) - - # WolfSSL 5.5.4 bug workaround below does not work, due to some CMake bug ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64") - SET(WOLFSSL_X86_64_BUILD 1) IF(CMAKE_C_COMPILER_ID MATCHES GNU AND CMAKE_C_COMPILER_VERSION VERSION_LESS 4.9) MESSAGE_ONCE(NO_INTEL_ASSEMBLY "Disable Intel assembly for WolfSSL - compiler is too old") + ELSEIF(WITH_MSAN) + MESSAGE_ONCE(MSAN_CANT_HANDLE_IT "Disable Intel assembly for WolfSSL - MSAN can't handle it") ELSE() - IF(WITH_MSAN) - MESSAGE_ONCE(MSAN_CANT_HANDLE_IT - "Disable Intel assembly for WolfSSL - MSAN can't handle it") - ELSE() - MY_CHECK_C_COMPILER_FLAG(-maes) - MY_CHECK_C_COMPILER_FLAG(-msse4) - MY_CHECK_C_COMPILER_FLAG(-mpclmul) - IF(have_C__maes AND have_C__msse4 AND have_C__mpclmul) - SET(WOLFSSL_INTELASM ON) + MY_CHECK_C_COMPILER_FLAG(-maes) + MY_CHECK_C_COMPILER_FLAG(-msse4) + MY_CHECK_C_COMPILER_FLAG(-mpclmul) + IF(have_C__maes AND have_C__msse4 AND have_C__mpclmul) + SET(WOLFSSL_INTELASM ON) + MY_CHECK_C_COMPILER_FLAG(-mrdrnd) + MY_CHECK_C_COMPILER_FLAG(-mrdseed) + IF(have_C__mrdrnd) + SET(HAVE_INTEL_RDRAND ON) + ENDIF() + IF(have_C__mrdseed) + SET(HAVE_INTEL_RDSEED ON) ENDIF() - ENDIF() - MY_CHECK_C_COMPILER_FLAG(-mrdrnd) - MY_CHECK_C_COMPILER_FLAG(-mrdseed) - IF(have_C__mrdrnd) - SET(HAVE_INTEL_RDRAND ON) - ENDIF() - IF(have_C__mrdseed) - SET(HAVE_INTEL_RDSEED ON) ENDIF() ENDIF() ENDIF() ENDIF() SET(WOLFSSL_SRCDIR ${CMAKE_CURRENT_SOURCE_DIR}/wolfssl/src) -ADD_DEFINITIONS(${SSL_DEFINES}) - -SET(WOLFSSL_SOURCES - ${WOLFSSL_SRCDIR}/crl.c - ${WOLFSSL_SRCDIR}/internal.c - ${WOLFSSL_SRCDIR}/keys.c - ${WOLFSSL_SRCDIR}/tls.c - ${WOLFSSL_SRCDIR}/wolfio.c - ${WOLFSSL_SRCDIR}/ocsp.c - ${WOLFSSL_SRCDIR}/ssl.c - ${WOLFSSL_SRCDIR}/tls13.c) - -ADD_DEFINITIONS(-DWOLFSSL_LIB -DBUILDING_WOLFSSL) - -INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/wolfssl) -IF(MSVC) - # size_t to long truncation warning - SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -wd4267 -wd4334 -wd4028 -wd4244") -ENDIF() - -ADD_CONVENIENCE_LIBRARY(wolfssl ${WOLFSSL_SOURCES}) - -# Workaround linker crash with older Ubuntu binutils -# e.g aborting at ../../bfd/merge.c line 873 in _bfd_merged_section_offset -IF(CMAKE_SYSTEM_NAME MATCHES "Linux") - STRING(REPLACE "-g " "-g1 " CMAKE_C_FLAGS_RELWITHDEBINFO - ${CMAKE_C_FLAGS_RELWITHDEBINFO}) - STRING(REPLACE "-g " "-g1 " CMAKE_C_FLAGS_DEBUG - ${CMAKE_C_FLAGS_DEBUG}) - STRING(REPLACE "-ggdb3 " " " CMAKE_C_FLAGS_RELWITHDEBINFO - ${CMAKE_C_FLAGS_RELWITHDEBINFO}) - STRING(REPLACE "-ggdb3 " " " CMAKE_C_FLAGS_DEBUG - ${CMAKE_C_FLAGS_DEBUG}) -ENDIF() - SET(WOLFCRYPT_SRCDIR ${CMAKE_CURRENT_SOURCE_DIR}/wolfssl/wolfcrypt/src) -SET(WOLFCRYPT_SOURCES +ADD_DEFINITIONS(${SSL_DEFINES}) +ADD_DEFINITIONS(-DWOLFSSL_LIB -DBUILDING_WOLFSSL) +ADD_DEFINITIONS(-DWOLFSSL_SP_4096) +INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/wolfssl) +INCLUDE_DIRECTORIES(${SSL_INCLUDE_DIRS}) + +add_library(wolfssl STATIC +${WOLFSSL_SRCDIR}/crl.c +${WOLFSSL_SRCDIR}/internal.c +${WOLFSSL_SRCDIR}/keys.c +${WOLFSSL_SRCDIR}/tls.c +${WOLFSSL_SRCDIR}/wolfio.c +${WOLFSSL_SRCDIR}/ocsp.c +${WOLFSSL_SRCDIR}/ssl.c +${WOLFSSL_SRCDIR}/tls13.c ${WOLFCRYPT_SRCDIR}/aes.c ${WOLFCRYPT_SRCDIR}/arc4.c ${WOLFCRYPT_SRCDIR}/asn.c @@ -110,69 +81,56 @@ ${WOLFCRYPT_SRCDIR}/wc_encrypt.c ${WOLFCRYPT_SRCDIR}/hash.c ${WOLFCRYPT_SRCDIR}/wolfmath.c ${WOLFCRYPT_SRCDIR}/kdf.c +${WOLFCRYPT_SRCDIR}/sp_int.c +${WOLFCRYPT_SRCDIR}/sp_c32.c +${WOLFCRYPT_SRCDIR}/sp_c64.c ) -# Use fastmath large number math library. -IF(NOT (MSVC AND CMAKE_C_COMPILER_ID MATCHES Clang)) - # Can't use clang-cl with WOLFSSL_FASTMATH - # due to https://bugs.llvm.org/show_bug.cgi?id=25305 - SET(WOLFSSL_FASTMATH 1) -ENDIF() - -IF(WOLFSSL_FASTMATH) - SET(USE_FAST_MATH 1) - SET(TFM_TIMING_RESISTANT 1) - # FP_MAX_BITS is set high solely to satisfy ssl_8k_key.test - # WolfSSL will use more stack space with it - SET(FP_MAX_BITS 16384) - SET(WOLFCRYPT_SOURCES ${WOLFCRYPT_SOURCES} ${WOLFCRYPT_SRCDIR}/tfm.c) - IF((CMAKE_SIZEOF_VOID_P MATCHES 4) AND (CMAKE_SYSTEM_PROCESSOR MATCHES "86") - AND (NOT MSVC)) - # Workaround https://github.com/wolfSSL/wolfssl/issues/4245 - # On 32bit Intel, to satisfy inline assembly's wish for free registers - # 1. use -fomit-frame-pointer - # 2. With GCC 4, additionally use -fno-PIC, which works on x86 - # (modern GCC has PIC optimizations, that make it unnecessary) - # The following assumes GCC or Clang - SET(TFM_COMPILE_FLAGS "-fomit-frame-pointer") - IF(CMAKE_C_COMPILER_VERSION VERSION_LESS "5") - SET(TFM_COMPILE_FLAGS "${TFM_COMPILE_FLAGS} -fno-PIC") - ENDIF() - SET_SOURCE_FILES_PROPERTIES(${WOLFCRYPT_SRCDIR}/tfm.c - PROPERTIES COMPILE_FLAGS ${TFM_COMPILE_FLAGS}) - ENDIF() -ELSE() - SET(WOLFSSL_SP_MATH_ALL 1) - SET(WOLFCRYPT_SOURCES ${WOLFCRYPT_SOURCES} ${WOLFCRYPT_SRCDIR}/sp_int.c) -ENDIF() - -IF(WOLFSSL_X86_64_BUILD) - LIST(APPEND WOLFCRYPT_SOURCES ${WOLFCRYPT_SRCDIR}/cpuid.c) - IF(MSVC) - SET(WOLFSSL_AESNI 1) - LIST(APPEND WOLFCRYPT_SOURCES - ${WOLFCRYPT_SRCDIR}/aes_asm.asm - ${WOLFCRYPT_SRCDIR}/aes_gcm_asm.asm) - IF(CMAKE_C_COMPILER_ID MATCHES Clang) - SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes -msse4.2 -mpclmul -mrdrnd -mrdseed") - ENDIF() - ELSEIF(WOLFSSL_INTELASM) - SET(WOLFSSL_AESNI 1) - SET(USE_INTEL_SPEEDUP 1) - LIST(APPEND WOLFCRYPT_SOURCES +# Optimizations, assembly +if(WOLFSSL_INTELASM) + set(WOLFSSL_X86_64_BUILD 1) + set(WOLFSSL_SP_X86_64 1) + set(WOLFSSL_SP_X86_64_ASM 1) + set(WOLFSSL_AESNI 1) + target_sources(wolfssl PRIVATE + ${WOLFCRYPT_SRCDIR}/cpuid.c + ${WOLFCRYPT_SRCDIR}/sp_x86_64.c + ) + if(MSVC_INTEL) + target_sources(wolfssl PRIVATE + ${WOLFCRYPT_SRCDIR}/aes_asm.asm + ${WOLFCRYPT_SRCDIR}/aes_gcm_asm.asm + ${WOLFCRYPT_SRCDIR}/sp_x86_64_asm.asm + ) + target_compile_options(wolfssl PRIVATE + $<$:-maes -msse4.2 -mpclmul -mrdrnd -mrdseed> + $<$:/Zi> + ) + else() + set(USE_INTEL_SPEEDUP 1) + target_sources(wolfssl PRIVATE ${WOLFCRYPT_SRCDIR}/aes_asm.S ${WOLFCRYPT_SRCDIR}/aes_gcm_asm.S ${WOLFCRYPT_SRCDIR}/chacha_asm.S ${WOLFCRYPT_SRCDIR}/poly1305_asm.S ${WOLFCRYPT_SRCDIR}/sha512_asm.S - ${WOLFCRYPT_SRCDIR}/sha256_asm.S) - ADD_DEFINITIONS(-maes -msse4.2 -mpclmul) - # WolfSSL 5.5.4 bug - user_settings.h not included into aes_asm.S - SET_PROPERTY(SOURCE ${WOLFCRYPT_SRCDIR}/aes_asm.S APPEND PROPERTY COMPILE_OPTIONS "-DWOLFSSL_X86_64_BUILD") - ENDIF() -ENDIF() + ${WOLFCRYPT_SRCDIR}/sha256_asm.S + ${WOLFCRYPT_SRCDIR}/sp_x86_64_asm.S + ) + target_compile_options(wolfssl PRIVATE -maes -msse4.2 -mpclmul) + # Workaround 5.5.4 bug (user_settings.h not included into aes_asm.S) + set_property(SOURCE ${WOLFCRYPT_SRCDIR}/aes_asm.S APPEND PROPERTY COMPILE_OPTIONS "-DWOLFSSL_X86_64_BUILD") + endif() +endif() + +# Silence some warnings +if(MSVC) + # truncation warnings + target_compile_options(wolfssl PRIVATE $<$:/wd4244>) + if(CMAKE_C_COMPILER_ID MATCHES Clang) + target_compile_options(wolfssl PRIVATE $<$:-Wno-incompatible-function-pointer-types>) + endif() +endif() CONFIGURE_FILE(user_settings.h.in user_settings.h) -INCLUDE_DIRECTORIES(${SSL_INCLUDE_DIRS}) -ADD_CONVENIENCE_LIBRARY(wolfcrypt ${WOLFCRYPT_SOURCES}) diff --git a/extra/wolfssl/user_settings.h.in b/extra/wolfssl/user_settings.h.in index baa64fcdfbe..489118b33b4 100644 --- a/extra/wolfssl/user_settings.h.in +++ b/extra/wolfssl/user_settings.h.in @@ -21,6 +21,7 @@ #define HAVE_AESGCM #define HAVE_CHACHA #define HAVE_POLY1305 +#define HAVE_THREAD_LS #define WOLFSSL_AES_COUNTER #define NO_WOLFSSL_STUB #define OPENSSL_ALL @@ -51,20 +52,19 @@ #define NO_RABBIT #define NO_RC4 -/* - FP_MAX_BITS is set high solely to satisfy ssl_8k_key.test - WolfSSL will use more stack space with it, with fastmath -*/ -#cmakedefine FP_MAX_BITS 16384 #define RSA_MAX_SIZE 8192 +#define WOLFSSL_SP_MATH_ALL +#define WOLFSSL_HAVE_SP_RSA +#ifndef WOLFSSL_SP_4096 +#define WOLFSSL_SP_4096 +#endif + #cmakedefine WOLFSSL_AESNI -#cmakedefine USE_FAST_MATH -#cmakedefine TFM_TIMING_RESISTANT #cmakedefine HAVE_INTEL_RDSEED #cmakedefine HAVE_INTEL_RDRAND #cmakedefine USE_INTEL_SPEEDUP -#cmakedefine USE_FAST_MATH #cmakedefine WOLFSSL_X86_64_BUILD -#cmakedefine WOLFSSL_SP_MATH_ALL +#cmakedefine WOLFSSL_SP_X86_64 +#cmakedefine WOLFSSL_SP_X86_64_ASM #endif /* WOLFSSL_USER_SETTINGS_H */