From 232d7a5e2dc50181294b927e2bc4b02e282725a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 29 Jul 2024 10:58:09 +0300 Subject: [PATCH] MDEV-34565: SIGILL due to OS not supporting AVX512 It is not sufficient to check that the CPU supports the necessary instructions. Also the operating system (or virtual machine hypervisor) must enable all the AVX registers to be saved and restored on a context switch. Because clang 8 does not support the compiler intrinsic _xgetbv() we will require clang 9 or later for enabling the use of VPCLMULQDQ and the related AVX512 features. --- mysys/crc32/crc32c_x86.cc | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/mysys/crc32/crc32c_x86.cc b/mysys/crc32/crc32c_x86.cc index 0a4fd9db812..3ddddf1303c 100644 --- a/mysys/crc32/crc32c_x86.cc +++ b/mysys/crc32/crc32c_x86.cc @@ -28,7 +28,8 @@ # elif __GNUC__ >= 14 || (defined __clang_major__ && __clang_major__ >= 18) # define TARGET "pclmul,evex512,avx512f,avx512dq,avx512bw,avx512vl,vpclmulqdq" # define USE_VPCLMULQDQ __attribute__((target(TARGET))) -# elif __GNUC__ >= 11 || (defined __clang_major__ && __clang_major__ >= 8) +# elif __GNUC__ >= 11 || (defined __clang_major__ && __clang_major__ >= 9) +/* clang 8 does not support _xgetbv(), which we also need */ # define TARGET "pclmul,avx512f,avx512dq,avx512bw,avx512vl,vpclmulqdq" # define USE_VPCLMULQDQ __attribute__((target(TARGET))) # endif @@ -38,6 +39,7 @@ extern "C" unsigned crc32c_sse42(unsigned crc, const void* buf, size_t size); constexpr uint32_t cpuid_ecx_SSE42= 1U << 20; constexpr uint32_t cpuid_ecx_SSE42_AND_PCLMUL= cpuid_ecx_SSE42 | 1U << 1; +constexpr uint32_t cpuid_ecx_XSAVE= 1U << 26; static uint32_t cpuid_ecx() { @@ -382,8 +384,19 @@ static unsigned crc32_avx512(unsigned crc, const char *buf, size_t size, } } -static ATTRIBUTE_NOINLINE int have_vpclmulqdq() +#ifdef __GNUC__ +__attribute__((target("xsave"))) +#endif +static bool os_have_avx512() { + // The following flags must be set: SSE, AVX, OPMASK, ZMM_HI256, HI16_ZMM + return !(~_xgetbv(0 /*_XCR_XFEATURE_ENABLED_MASK*/) & 0xe6); +} + +static ATTRIBUTE_NOINLINE bool have_vpclmulqdq(uint32_t cpuid_ecx) +{ + if (!(cpuid_ecx & cpuid_ecx_XSAVE) || !os_have_avx512()) + return false; # ifdef _MSC_VER int regs[4]; __cpuidex(regs, 7, 0); @@ -410,10 +423,11 @@ static unsigned crc32c_vpclmulqdq(unsigned crc, const void *buf, size_t size) extern "C" my_crc32_t crc32_pclmul_enabled(void) { - if (~cpuid_ecx() & cpuid_ecx_SSE42_AND_PCLMUL) + const uint32_t ecx= cpuid_ecx(); + if (~ecx & cpuid_ecx_SSE42_AND_PCLMUL) return nullptr; #ifdef USE_VPCLMULQDQ - if (have_vpclmulqdq()) + if (have_vpclmulqdq(ecx)) return crc32_vpclmulqdq; #endif return crc32_pclmul; @@ -421,19 +435,20 @@ extern "C" my_crc32_t crc32_pclmul_enabled(void) extern "C" my_crc32_t crc32c_x86_available(void) { + const uint32_t ecx= cpuid_ecx(); #ifdef USE_VPCLMULQDQ - if (have_vpclmulqdq()) + if (have_vpclmulqdq(ecx)) return crc32c_vpclmulqdq; #endif #if SIZEOF_SIZE_T == 8 - switch (cpuid_ecx() & cpuid_ecx_SSE42_AND_PCLMUL) { + switch (ecx & cpuid_ecx_SSE42_AND_PCLMUL) { case cpuid_ecx_SSE42_AND_PCLMUL: return crc32c_3way; case cpuid_ecx_SSE42: return crc32c_sse42; } #else - if (cpuid_ecx() & cpuid_ecx_SSE42) + if (ecx & cpuid_ecx_SSE42) return crc32c_sse42; #endif return nullptr;