From 70738cf726b813db9111bd0a6dab1e53a3c829f6 Mon Sep 17 00:00:00 2001 From: Allan Sandfeld Jensen Date: Tue, 8 Oct 2024 16:30:20 +0200 Subject: [PATCH] Fix runtime CPU feature detection for ARM We had no routines for checking compiler support instead only compiler currently enabled. Change-Id: I5543e4cff2b0bab494e11abf257061147baaf0d7 Reviewed-by: Thiago Macieira Reviewed-by: Alexandru Croitor --- cmake/QtCompilerOptimization.cmake | 1 + cmake/QtFeature.cmake | 25 ++++++++++++++++++ cmake/QtProcessConfigureArgs.cmake | 1 + config.tests/arch/arch.cpp | 2 +- config.tests/armintrin/CMakeLists.txt | 21 +++++++++++++++ config.tests/armintrin/main.cpp | 37 +++++++++++++++++++++++++++ configure.cmake | 12 ++++++--- src/corelib/global/qsimd_p.h | 10 +++++--- src/corelib/tools/qhash.cpp | 2 +- 9 files changed, 103 insertions(+), 8 deletions(-) create mode 100644 config.tests/armintrin/CMakeLists.txt create mode 100644 config.tests/armintrin/main.cpp diff --git a/cmake/QtCompilerOptimization.cmake b/cmake/QtCompilerOptimization.cmake index 3df4d777dce..e1e31893ab1 100644 --- a/cmake/QtCompilerOptimization.cmake +++ b/cmake/QtCompilerOptimization.cmake @@ -67,6 +67,7 @@ if(GCC OR CLANG OR QCC) set(QT_CFLAGS_NEON "${__prefix}-mfpu=neon") endif() set(QT_CFLAGS_ARM_SVE "${__prefix}-march=armv8-a+sve") + set(QT_CFLAGS_ARM_CRYPTO "${__prefix}-march=armv8-a+crypto") set(QT_CFLAGS_MIPS_DSP "${__prefix}-mdsp") set(QT_CFLAGS_MIPS_DSPR2 "${__prefix}-mdspr2") unset(__prefix) diff --git a/cmake/QtFeature.cmake b/cmake/QtFeature.cmake index 1e9e0abbae1..189cf0e2b55 100644 --- a/cmake/QtFeature.cmake +++ b/cmake/QtFeature.cmake @@ -1283,6 +1283,31 @@ function(qt_config_compile_test_x86simd extension label) set(TEST_subarch_${extension} "${TEST_X86SIMD_${extension}}" CACHE INTERNAL "${label}") endfunction() +function(qt_config_compile_test_armintrin extension label) + if (DEFINED TEST_ARMINTRIN_${extension}) + return() + endif() + + set(flags "-DSIMD:string=${extension}") + + qt_get_platform_try_compile_vars(platform_try_compile_vars) + list(APPEND flags ${platform_try_compile_vars}) + + message(STATUS "Performing Test ${label} intrinsics") + try_compile("TEST_ARMINTRIN_${extension}" + "${CMAKE_CURRENT_BINARY_DIR}/config.tests/armintrin_${extension}" + "${CMAKE_CURRENT_SOURCE_DIR}/config.tests/armintrin" + armintrin + CMAKE_FLAGS ${flags}) + if(${TEST_ARMINTRIN_${extension}}) + set(status_label "Success") + else() + set(status_label "Failed") + endif() + message(STATUS "Performing Test ${label} intrinsics - ${status_label}") + set(TEST_subarch_${extension} "${TEST_ARMINTRIN_${extension}}" CACHE INTERNAL "${label}") +endfunction() + function(qt_config_compile_test_machine_tuple label) if(DEFINED TEST_MACHINE_TUPLE OR NOT (LINUX OR HURD) OR ANDROID) return() diff --git a/cmake/QtProcessConfigureArgs.cmake b/cmake/QtProcessConfigureArgs.cmake index 86c6d4b09b7..dca6b7cbb81 100644 --- a/cmake/QtProcessConfigureArgs.cmake +++ b/cmake/QtProcessConfigureArgs.cmake @@ -244,6 +244,7 @@ endmacro() defstub(qt_add_qmake_lib_dependency) defstub(qt_config_compile_test) +defstub(qt_config_compile_test_armintrin) defstub(qt_config_compile_test_machine_tuple) defstub(qt_config_compile_test_x86simd) defstub(qt_config_compiler_supports_flag_test) diff --git a/config.tests/arch/arch.cpp b/config.tests/arch/arch.cpp index 58595f0a2ec..80995df11ea 100644 --- a/config.tests/arch/arch.cpp +++ b/config.tests/arch/arch.cpp @@ -248,7 +248,7 @@ const char msg2[] = "==Qt=magic=Qt== Sub-architecture:" #if defined(__ARM_FEATURE_CRC32) || (defined(_M_ARM64) && __ARM_ARCH >= 800) " crc32" #endif -#if defined(__ARM_FEATURE_CRYPTO) || (defined(_M_ARM64) && __ARM_ARCH >= 800) +#if defined(__ARM_FEATURE_AES) || defined(__ARM_FEATURE_CRYPTO) || (defined(_M_ARM64) && __ARM_ARCH >= 800) " crypto" #endif #ifdef __ARM_FEATURE_SVE diff --git a/config.tests/armintrin/CMakeLists.txt b/config.tests/armintrin/CMakeLists.txt new file mode 100644 index 00000000000..6f8bdaf63e5 --- /dev/null +++ b/config.tests/armintrin/CMakeLists.txt @@ -0,0 +1,21 @@ +# Copyright (C) 2024 The Qt Company Ltd. +# SPDX-License-Identifier: BSD-3-Clause + +cmake_minimum_required(VERSION 3.16) +project(armintrin LANGUAGES CXX) + +include(../../cmake/QtPlatformSupport.cmake) +include(../../cmake/QtCompilerOptimization.cmake) + +string(TOUPPER "${SIMD}" upper_simd) + +if(NOT DEFINED "QT_CFLAGS_ARM_${upper_simd}") + # Don't use CMake error() because a configure error also fails the try_compile() call. + # Instead use a compile flag that doesn't exist to force a compiler error. + set(QT_CFLAGS_${upper_simd} "--qt-cflags-not-found") +endif() + +add_executable("SimdTest${SIMD}") +target_sources("SimdTest${SIMD}" PRIVATE main.cpp) +target_compile_options("SimdTest${SIMD}" PRIVATE ${QT_CFLAGS_ARM_${upper_simd}}) +target_compile_definitions("SimdTest${SIMD}" PRIVATE QT_COMPILER_SUPPORTS_${upper_simd}) diff --git a/config.tests/armintrin/main.cpp b/config.tests/armintrin/main.cpp new file mode 100644 index 00000000000..1d37693a954 --- /dev/null +++ b/config.tests/armintrin/main.cpp @@ -0,0 +1,37 @@ +// Copyright (C) 2024 +// SPDX-License-Identifier: BSD-3-Clause + +#if defined QT_COMPILER_SUPPORTS_CRYPTO +#include +void aeshash(uint8x16_t &state0, uint8x16_t data) +{ + auto state1 = state0; + state0 = vaeseq_u8(state0, data); + state0 = vaesmcq_u8(state0); + auto state2 = state0; + state0 = vaeseq_u8(state0, state1); + state0 = vaesmcq_u8(state0); + auto state3 = state0; + state0 = vaeseq_u8(state0, state2); + state0 = vaesmcq_u8(state0); + state0 = veorq_u8(state0, state3); +} +#elif defined QT_COMPILER_SUPPORTS_SVE +#include +void qt_memfill64_sve(uint64_t *dest, uint64_t color, int64_t count) +{ + int64_t i = 0; + const svuint64_t vdup = svdup_n_u64(color); + svbool_t pg = svwhilelt_b64_s64(i, count); + do { + svst1_u64(pg, (uint64_t*)(dest + i), vdup); + i += svcntd(); + pg = svwhilelt_b64_s64(i, count); + } while (svptest_any(svptrue_b64(), pg)); +} +#endif + +int main(int argc, char **argv) +{ + return 0; +} diff --git a/configure.cmake b/configure.cmake index c748de32ed6..df72f1e2610 100644 --- a/configure.cmake +++ b/configure.cmake @@ -370,6 +370,12 @@ qt_config_compile_test_x86simd(avx512vbmi2 "AVX512VBMI2") # x86: vaes qt_config_compile_test_x86simd(vaes "VAES") +# arm: crypto +qt_config_compile_test_armintrin(crypto "CRYPTO") + +# arm: sve +qt_config_compile_test_armintrin(sve "SVE") + # localtime_r qt_config_compile_test(localtime_r LABEL "localtime_r()" @@ -923,7 +929,7 @@ qt_feature_definition("arm_crc32" "QT_COMPILER_SUPPORTS_CRC32" VALUE "1") qt_feature_config("arm_crc32" QMAKE_PRIVATE_CONFIG) qt_feature("arm_crypto" PRIVATE LABEL "AES" - CONDITION ( ( TEST_architecture_arch STREQUAL arm ) OR ( TEST_architecture_arch STREQUAL arm64 ) ) AND TEST_arch_${TEST_architecture_arch}_subarch_crypto + CONDITION ( ( TEST_architecture_arch STREQUAL arm ) OR ( TEST_architecture_arch STREQUAL arm64 ) ) AND ( TEST_arch_${TEST_architecture_arch}_subarch_crypto OR TEST_subarch_crypto ) ) qt_feature_definition("arm_crypto" "QT_COMPILER_SUPPORTS_CRYPTO" VALUE "1") qt_feature_definition("arm_crypto" "QT_COMPILER_SUPPORTS_AES" VALUE "1") @@ -931,7 +937,7 @@ qt_feature_config("arm_crypto" QMAKE_PRIVATE_CONFIG) qt_feature("arm_sve" PRIVATE LABEL "SVE" - CONDITION ( TEST_architecture_arch STREQUAL arm64 ) AND TEST_arch_${TEST_architecture_arch}_subarch_sve + CONDITION ( TEST_architecture_arch STREQUAL arm64 ) AND ( TEST_arch_${TEST_architecture_arch}_subarch_sve OR TEST_subarch_sve ) ) qt_feature_definition("arm_sve" "QT_COMPILER_SUPPORTS_SVE" VALUE "1") qt_feature_config("arm_sve" QMAKE_PRIVATE_CONFIG) @@ -1291,7 +1297,7 @@ qt_configure_add_summary_entry( ) qt_configure_add_summary_entry( TYPE "featureList" - ARGS "neon arm_crc32 arm_crypto" + ARGS "neon arm_crc32 arm_crypto arm_sve" MESSAGE "ARM Extensions" CONDITION ( TEST_architecture_arch STREQUAL arm ) OR ( TEST_architecture_arch STREQUAL arm64 ) ) diff --git a/src/corelib/global/qsimd_p.h b/src/corelib/global/qsimd_p.h index 99786d92a0c..86c23418fc6 100644 --- a/src/corelib/global/qsimd_p.h +++ b/src/corelib/global/qsimd_p.h @@ -99,7 +99,7 @@ QT_WARNING_DISABLE_INTEL(103) #define QT_COMPILER_SUPPORTS(x) (QT_COMPILER_SUPPORTS_ ## x - 0) -#if defined(Q_PROCESSOR_ARM) +#if defined(Q_PROCESSOR_ARM_64) # define QT_COMPILER_SUPPORTS_HERE(x) ((__ARM_FEATURE_ ## x) || (__ ## x ## __) || QT_COMPILER_SUPPORTS(x)) # if defined(Q_CC_GNU) /* GCC requires attributes for a function */ @@ -107,6 +107,10 @@ QT_WARNING_DISABLE_INTEL(103) # else # define QT_FUNCTION_TARGET(x) # endif +#elif defined(Q_PROCESSOR_ARM_32) + /* We do not support for runtime CPU feature switching on ARM32 */ +# define QT_COMPILER_SUPPORTS_HERE(x) ((__ARM_FEATURE_ ## x) || (__ ## x ## __)) +# define QT_FUNCTION_TARGET(x) #elif defined(Q_PROCESSOR_MIPS) # define QT_COMPILER_SUPPORTS_HERE(x) (__ ## x ## __) # define QT_FUNCTION_TARGET(x) @@ -345,7 +349,7 @@ inline uint32x4_t qvsetq_n_u32(uint32_t a, uint32_t b, uint32_t c, uint32_t d) #if defined(Q_PROCESSOR_ARM_64) #if defined(Q_CC_CLANG) -#define QT_FUNCTION_TARGET_STRING_AES "crypto" +#define QT_FUNCTION_TARGET_STRING_AES "aes" #define QT_FUNCTION_TARGET_STRING_CRC32 "crc" #define QT_FUNCTION_TARGET_STRING_SVE "sve" #elif defined(Q_CC_GNU) @@ -395,7 +399,7 @@ static const uint64_t qCompilerCpuFeatures = 0 #if defined __ARM_FEATURE_CRC32 | CpuFeatureCRC32 #endif -#if defined __ARM_FEATURE_CRYPTO +#if defined (__ARM_FEATURE_CRYPTO) || defined(__ARM_FEATURE_AES) | CpuFeatureAES #endif #endif // Q_OS_LINUX && Q_PROCESSOR_ARM64 diff --git a/src/corelib/tools/qhash.cpp b/src/corelib/tools/qhash.cpp index 6db4314b5ca..56245dcd355 100644 --- a/src/corelib/tools/qhash.cpp +++ b/src/corelib/tools/qhash.cpp @@ -732,7 +732,7 @@ static size_t aeshash(const uchar *p, size_t len, size_t seed, size_t seed2) noe // Compared to x86 AES, ARM splits each round into two instructions // and includes the pre-xor instead of the post-xor. - const auto hash16bytes = [](uint8x16_t &state0, uint8x16_t data) { + const auto hash16bytes = [](uint8x16_t &state0, uint8x16_t data) QT_FUNCTION_TARGET(AES) { auto state1 = state0; state0 = vaeseq_u8(state0, data); state0 = vaesmcq_u8(state0);