diff --git a/src/corelib/global/qsimd.cpp b/src/corelib/global/qsimd.cpp index eafad7bb0d3..72fcf630948 100644 --- a/src/corelib/global/qsimd.cpp +++ b/src/corelib/global/qsimd.cpp @@ -350,22 +350,6 @@ static void xgetbv(uint in, uint &eax, uint &edx) #endif } -// Flags from the XCR0 state register -enum XCR0Flags { - X87 = 1 << 0, - XMM0_15 = 1 << 1, - YMM0_15Hi128 = 1 << 2, - BNDRegs = 1 << 3, - BNDCSR = 1 << 4, - OpMask = 1 << 5, - ZMM0_15Hi256 = 1 << 6, - ZMM16_31 = 1 << 7, - - SSEState = XMM0_15, - AVXState = XMM0_15 | YMM0_15Hi128, - AVX512State = AVXState | OpMask | ZMM0_15Hi256 | ZMM16_31 -}; - QT_FUNCTION_TARGET_BASELINE static quint64 adjustedXcr0(quint64 xcr0) { @@ -386,7 +370,7 @@ static quint64 adjustedXcr0(quint64 xcr0) constexpr quintptr cpu_capabilities64 = commpage + 0x10; quint64 capab = *reinterpret_cast(cpu_capabilities64); if (capab & kHasAVX512F) - xcr0 |= AVX512State; + xcr0 |= XSave_Avx512State; #endif return xcr0; @@ -395,9 +379,6 @@ static quint64 adjustedXcr0(quint64 xcr0) QT_FUNCTION_TARGET_BASELINE static quint64 detectProcessorFeatures() { - static const quint64 AllAVX = AllAVX512 | CpuFeatureAVX | CpuFeatureAVX2 | CpuFeatureF16C - | CpuFeatureFMA | CpuFeatureVAES; - quint64 features = 0; int cpuidLevel = maxBasicCpuidSupported(); #if Q_PROCESSOR_X86 < 5 @@ -408,38 +389,35 @@ static quint64 detectProcessorFeatures() #endif uint results[X86CpuidMaxLeaf] = {}; - cpuidFeatures01(results[Leaf1ECX], results[Leaf1EDX]); + cpuidFeatures01(results[Leaf01ECX], results[Leaf01EDX]); if (cpuidLevel >= 7) - cpuidFeatures07_00(results[Leaf7_0EBX], results[Leaf7_0ECX], results[Leaf7_0EDX]); + cpuidFeatures07_00(results[Leaf07_00EBX], results[Leaf07_00ECX], results[Leaf07_00EDX]); // populate our feature list - for (uint i = 0; i < sizeof(x86_locators) / sizeof(x86_locators[0]); ++i) { + for (uint i = 0; i < std::size(x86_locators); ++i) { uint word = x86_locators[i] / 32; uint bit = 1U << (x86_locators[i] % 32); - quint64 feature = Q_UINT64_C(1) << (i + 1); + quint64 feature = Q_UINT64_C(1) << i; if (results[word] & bit) features |= feature; } // now check the AVX state quint64 xcr0 = 0; - if (results[Leaf1ECX] & (1u << 27)) { + if (results[Leaf01ECX] & (1u << 27)) { // XGETBV enabled uint xgetbvA = 0, xgetbvD = 0; xgetbv(0, xgetbvA, xgetbvD); xcr0 = xgetbvA; - if (sizeof(XCR0Flags) > sizeof(xgetbvA)) + if (sizeof(XSaveBits) > sizeof(xgetbvA)) xcr0 |= quint64(xgetbvD) << 32; xcr0 = adjustedXcr0(xcr0); } - if ((xcr0 & AVXState) != AVXState) { - // support for YMM registers is disabled, disable all AVX - features &= ~AllAVX; - } else if ((xcr0 & AVX512State) != AVX512State) { - // support for ZMM registers or mask registers is disabled, disable all AVX512 - features &= ~AllAVX512; + for (auto req : xsave_requirements) { + if ((xcr0 & req.xsave_state) != req.xsave_state) + features &= ~req.cpu_features; } if (features & CpuFeatureRDRND && !checkRdrndWorks()) diff --git a/src/corelib/global/qsimd_p.h b/src/corelib/global/qsimd_p.h index 775aa2c6cb2..8f71b34feae 100644 --- a/src/corelib/global/qsimd_p.h +++ b/src/corelib/global/qsimd_p.h @@ -246,10 +246,12 @@ asm( # define __haswell__ 1 # endif +QT_BEGIN_NAMESPACE +static const quint64 qCompilerCpuFeatures = _compilerCpuFeatures; + // This constant does not include all CPU features found in a Haswell, only // those that we'd have optimized code for. // Note: must use Q_CONSTEXPR here, as this file may be compiled in C mode. -QT_BEGIN_NAMESPACE static const quint64 CpuFeatureArchHaswell = 0 | CpuFeatureSSE2 | CpuFeatureSSE3 diff --git a/src/corelib/global/qsimd_x86.cpp b/src/corelib/global/qsimd_x86.cpp index be17f44c095..f51536259e2 100644 --- a/src/corelib/global/qsimd_x86.cpp +++ b/src/corelib/global/qsimd_x86.cpp @@ -1,6 +1,6 @@ /**************************************************************************** ** -** Copyright (C) 2018 Intel Corporation. +** Copyright (C) 2022 Intel Corporation. ** Contact: https://www.qt.io/licensing/ ** ** This file is part of the QtCore module of the Qt Toolkit. @@ -37,9 +37,7 @@ ** ****************************************************************************/ -// This is a generated file. DO NOT EDIT. -// Please see util/x86simdgen/generate.pl -#include "qsimd_p.h" +#include "qsimd_x86_p.h" static const char features_string[] = " sse2\0" @@ -55,101 +53,188 @@ static const char features_string[] = " f16c\0" " rdrnd\0" " bmi\0" - " hle\0" " avx2\0" " bmi2\0" - " rtm\0" " avx512f\0" " avx512dq\0" " rdseed\0" " avx512ifma\0" - " avx512pf\0" - " avx512er\0" " avx512cd\0" " sha\0" " avx512bw\0" " avx512vl\0" " avx512vbmi\0" " avx512vbmi2\0" + " shstk\0" " gfni\0" " vaes\0" " avx512vnni\0" " avx512bitalg\0" " avx512vpopcntdq\0" - " avx5124nniw\0" - " avx5124fmaps\0" + " hybrid\0" + " ibt\0" + " avx512fp16\0" "\0"; -static const quint16 features_indices[] = { - 306, 0, 6, 12, 19, 24, 32, 40, - 47, 55, 60, 65, 71, 78, 83, 88, - 94, 100, 105, 114, 124, 132, 144, 154, - 164, 174, 179, 189, 199, 211, 224, 230, - 236, 248, 262, 279, 292 +static const uint16_t features_indices[] = { + 0, 6, 12, 19, 24, 32, 40, 47, + 55, 60, 65, 71, 78, 83, 89, 95, + 104, 114, 122, 134, 144, 149, 159, 169, + 181, 194, 201, 207, 213, 225, 239, 256, + 264, 269, }; enum X86CpuidLeaves { - Leaf1ECX, - Leaf1EDX, - Leaf7_0EBX, - Leaf7_0ECX, - Leaf7_0EDX, + Leaf01EDX, + Leaf01ECX, + Leaf07_00EBX, + Leaf07_00ECX, + Leaf07_00EDX, + Leaf07_01EAX, + Leaf13_01EAX, + Leaf80000001hECX, + Leaf80000008hEBX, X86CpuidMaxLeaf }; -static const quint8 x86_locators[] = { - Leaf1EDX*32 + 26, // sse2 - Leaf1ECX*32 + 0, // sse3 - Leaf1ECX*32 + 9, // ssse3 - Leaf1ECX*32 + 12, // fma - Leaf1ECX*32 + 19, // sse4.1 - Leaf1ECX*32 + 20, // sse4.2 - Leaf1ECX*32 + 22, // movbe - Leaf1ECX*32 + 23, // popcnt - Leaf1ECX*32 + 25, // aes - Leaf1ECX*32 + 28, // avx - Leaf1ECX*32 + 29, // f16c - Leaf1ECX*32 + 30, // rdrnd - Leaf7_0EBX*32 + 3, // bmi - Leaf7_0EBX*32 + 4, // hle - Leaf7_0EBX*32 + 5, // avx2 - Leaf7_0EBX*32 + 8, // bmi2 - Leaf7_0EBX*32 + 11, // rtm - Leaf7_0EBX*32 + 16, // avx512f - Leaf7_0EBX*32 + 17, // avx512dq - Leaf7_0EBX*32 + 18, // rdseed - Leaf7_0EBX*32 + 21, // avx512ifma - Leaf7_0EBX*32 + 26, // avx512pf - Leaf7_0EBX*32 + 27, // avx512er - Leaf7_0EBX*32 + 28, // avx512cd - Leaf7_0EBX*32 + 29, // sha - Leaf7_0EBX*32 + 30, // avx512bw - Leaf7_0EBX*32 + 31, // avx512vl - Leaf7_0ECX*32 + 1, // avx512vbmi - Leaf7_0ECX*32 + 6, // avx512vbmi2 - Leaf7_0ECX*32 + 8, // gfni - Leaf7_0ECX*32 + 9, // vaes - Leaf7_0ECX*32 + 11, // avx512vnni - Leaf7_0ECX*32 + 12, // avx512bitalg - Leaf7_0ECX*32 + 14, // avx512vpopcntdq - Leaf7_0EDX*32 + 2, // avx5124nniw - Leaf7_0EDX*32 + 3 // avx5124fmaps +static const uint16_t x86_locators[] = { + Leaf01EDX*32 + 26, // sse2 + Leaf01ECX*32 + 0, // sse3 + Leaf01ECX*32 + 9, // ssse3 + Leaf01ECX*32 + 12, // fma + Leaf01ECX*32 + 19, // sse4.1 + Leaf01ECX*32 + 20, // sse4.2 + Leaf01ECX*32 + 22, // movbe + Leaf01ECX*32 + 23, // popcnt + Leaf01ECX*32 + 25, // aes + Leaf01ECX*32 + 28, // avx + Leaf01ECX*32 + 29, // f16c + Leaf01ECX*32 + 30, // rdrnd + Leaf07_00EBX*32 + 3, // bmi + Leaf07_00EBX*32 + 5, // avx2 + Leaf07_00EBX*32 + 8, // bmi2 + Leaf07_00EBX*32 + 16, // avx512f + Leaf07_00EBX*32 + 17, // avx512dq + Leaf07_00EBX*32 + 18, // rdseed + Leaf07_00EBX*32 + 21, // avx512ifma + Leaf07_00EBX*32 + 28, // avx512cd + Leaf07_00EBX*32 + 29, // sha + Leaf07_00EBX*32 + 30, // avx512bw + Leaf07_00EBX*32 + 31, // avx512vl + Leaf07_00ECX*32 + 1, // avx512vbmi + Leaf07_00ECX*32 + 6, // avx512vbmi2 + Leaf07_00ECX*32 + 7, // shstk + Leaf07_00ECX*32 + 8, // gfni + Leaf07_00ECX*32 + 9, // vaes + Leaf07_00ECX*32 + 11, // avx512vnni + Leaf07_00ECX*32 + 12, // avx512bitalg + Leaf07_00ECX*32 + 14, // avx512vpopcntdq + Leaf07_00EDX*32 + 15, // hybrid + Leaf07_00EDX*32 + 20, // ibt + Leaf07_00EDX*32 + 23, // avx512fp16 +}; + +struct X86Architecture +{ + uint64_t features; + char name[17 + 1]; +}; + +static const struct X86Architecture x86_architectures[] = { + { cpu_sapphirerapids, "Sapphire Rapids" }, + { cpu_tigerlake, "Tiger Lake" }, + { cpu_icelake_server, "Ice Lake (Server)" }, + { cpu_icelake_client, "Ice Lake (Client)" }, + { cpu_alderlake, "Alder Lake" }, + { cpu_cooperlake, "Cooper Lake" }, + { cpu_cannonlake, "Cannon Lake" }, + { cpu_cascadelake, "Cascade Lake" }, + { cpu_skylake_avx512, "Skylake (Avx512)" }, + { cpu_skylake, "Skylake" }, + { cpu_tremont, "Tremont" }, + { cpu_broadwell, "Broadwell" }, + { cpu_haswell, "Haswell" }, + { cpu_goldmont, "Goldmont" }, + { cpu_ivybridge, "Ivy Bridge" }, + { cpu_silvermont, "Silvermont" }, + { cpu_sandybridge, "Sandy Bridge" }, + { cpu_westmere, "Westmere" }, + { cpu_core2, "Core2" }, +}; + +enum XSaveBits { + XSave_X87 = 0x0001, // X87 and MMX state + XSave_SseState = 0x0002, // SSE: 128 bits of XMM registers + XSave_Ymm_Hi128 = 0x0004, // AVX: high 128 bits in YMM registers + XSave_Bndregs = 0x0008, // Memory Protection Extensions + XSave_Bndcsr = 0x0010, // Memory Protection Extensions + XSave_OpMask = 0x0020, // AVX512: k0 through k7 + XSave_Zmm_Hi256 = 0x0040, // AVX512: high 256 bits of ZMM0-15 + XSave_Hi16_Zmm = 0x0080, // AVX512: all 512 bits of ZMM16-31 + XSave_PTState = 0x0100, // Processor Trace + XSave_PKRUState = 0x0200, // Protection Key + XSave_CetUState = 0x0800, // CET: user mode + XSave_CetSState = 0x1000, // CET: supervisor mode + XSave_HdcState = 0x2000, // Hardware Duty Cycle + XSave_UintrState = 0x4000, // User Interrupts + XSave_HwpState = 0x10000, // Hardware P-State + XSave_Xtilecfg = 0x20000, // AMX: XTILECFG register + XSave_Xtiledata = 0x40000, // AMX: data in the tiles + XSave_AvxState = XSave_SseState | XSave_Ymm_Hi128, + XSave_MPXState = XSave_Bndregs | XSave_Bndcsr, + XSave_Avx512State = XSave_AvxState | XSave_OpMask | XSave_Zmm_Hi256 | XSave_Hi16_Zmm, + XSave_CetState = XSave_CetUState | XSave_CetSState, + XSave_AmxState = XSave_Xtilecfg | XSave_Xtiledata, +}; + +// List of features requiring XSave_AvxState +static const uint64_t XSaveReq_AvxState = 0 + | cpu_feature_fma + | cpu_feature_avx + | cpu_feature_f16c + | cpu_feature_avx2 + | cpu_feature_avx512f + | cpu_feature_avx512dq + | cpu_feature_avx512ifma + | cpu_feature_avx512cd + | cpu_feature_avx512bw + | cpu_feature_avx512vl + | cpu_feature_avx512vbmi + | cpu_feature_avx512vbmi2 + | cpu_feature_vaes + | cpu_feature_avx512vnni + | cpu_feature_avx512bitalg + | cpu_feature_avx512vpopcntdq + | cpu_feature_avx512fp16; + +// List of features requiring XSave_Avx512State +static const uint64_t XSaveReq_Avx512State = 0 + | cpu_feature_avx512f + | cpu_feature_avx512dq + | cpu_feature_avx512ifma + | cpu_feature_avx512cd + | cpu_feature_avx512bw + | cpu_feature_avx512vl + | cpu_feature_avx512vbmi + | cpu_feature_avx512vbmi2 + | cpu_feature_avx512vnni + | cpu_feature_avx512bitalg + | cpu_feature_avx512vpopcntdq + | cpu_feature_avx512fp16; + +// List of features requiring XSave_CetState +static const uint64_t XSaveReq_CetState = 0 + | cpu_feature_shstk; + +struct XSaveRequirementMapping +{ + uint64_t cpu_features; + uint64_t xsave_state; +}; + +static const struct XSaveRequirementMapping xsave_requirements[] = { + { XSaveReq_AvxState, XSave_AvxState }, + { XSaveReq_Avx512State, XSave_Avx512State }, + { XSaveReq_CetState, XSave_CetState }, }; -// List of AVX512 features (see detectProcessorFeatures()) -static const quint64 AllAVX512 = 0 - | CpuFeatureAVX512F - | CpuFeatureAVX512DQ - | CpuFeatureAVX512IFMA - | CpuFeatureAVX512PF - | CpuFeatureAVX512ER - | CpuFeatureAVX512CD - | CpuFeatureAVX512BW - | CpuFeatureAVX512VL - | CpuFeatureAVX512VBMI - | CpuFeatureAVX512VBMI2 - | CpuFeatureAVX512VNNI - | CpuFeatureAVX512BITALG - | CpuFeatureAVX512VPOPCNTDQ - | CpuFeatureAVX5124NNIW - | CpuFeatureAVX5124FMAPS; diff --git a/src/corelib/global/qsimd_x86_p.h b/src/corelib/global/qsimd_x86_p.h index 82e3008a24c..19cc55a3d0a 100644 --- a/src/corelib/global/qsimd_x86_p.h +++ b/src/corelib/global/qsimd_x86_p.h @@ -1,6 +1,6 @@ /**************************************************************************** ** -** Copyright (C) 2018 Intel Corporation. +** Copyright (C) 2022 Intel Corporation. ** Contact: https://www.qt.io/licensing/ ** ** This file is part of the QtCore module of the Qt Toolkit. @@ -38,224 +38,358 @@ ****************************************************************************/ // This is a generated file. DO NOT EDIT. -// Please see util/x86simdgen/generate.pl -#ifndef QSIMD_P_H -# error "Please include instead" -#endif +// Please see 3rdparty/x86simd_generate.pl #ifndef QSIMD_X86_P_H #define QSIMD_X86_P_H -#include "qsimd_p.h" - -// -// W A R N I N G -// ------------- -// -// This file is not part of the Qt API. It exists purely as an -// implementation detail. This header file may change from version to -// version without notice, or even be removed. -// -// We mean it. -// - -QT_BEGIN_NAMESPACE - -// used only to indicate that the CPU detection was initialized -#define QSimdInitialized (Q_UINT64_C(1) << 0) +#include // in CPUID Leaf 1, EDX: -#define CpuFeatureSSE2 (Q_UINT64_C(1) << 1) +#define cpu_feature_sse2 (UINT64_C(1) << 0) #define QT_FUNCTION_TARGET_STRING_SSE2 "sse2" // in CPUID Leaf 1, ECX: -#define CpuFeatureSSE3 (Q_UINT64_C(1) << 2) +#define cpu_feature_sse3 (UINT64_C(1) << 1) #define QT_FUNCTION_TARGET_STRING_SSE3 "sse3" -#define CpuFeatureSSSE3 (Q_UINT64_C(1) << 3) +#define cpu_feature_ssse3 (UINT64_C(1) << 2) #define QT_FUNCTION_TARGET_STRING_SSSE3 "ssse3" -#define CpuFeatureFMA (Q_UINT64_C(1) << 4) +#define cpu_feature_fma (UINT64_C(1) << 3) #define QT_FUNCTION_TARGET_STRING_FMA "fma" -#define CpuFeatureSSE4_1 (Q_UINT64_C(1) << 5) +#define cpu_feature_sse4_1 (UINT64_C(1) << 4) #define QT_FUNCTION_TARGET_STRING_SSE4_1 "sse4.1" -#define CpuFeatureSSE4_2 (Q_UINT64_C(1) << 6) +#define cpu_feature_sse4_2 (UINT64_C(1) << 5) #define QT_FUNCTION_TARGET_STRING_SSE4_2 "sse4.2" -#define CpuFeatureMOVBE (Q_UINT64_C(1) << 7) +#define cpu_feature_movbe (UINT64_C(1) << 6) #define QT_FUNCTION_TARGET_STRING_MOVBE "movbe" -#define CpuFeaturePOPCNT (Q_UINT64_C(1) << 8) +#define cpu_feature_popcnt (UINT64_C(1) << 7) #define QT_FUNCTION_TARGET_STRING_POPCNT "popcnt" -#define CpuFeatureAES (Q_UINT64_C(1) << 9) +#define cpu_feature_aes (UINT64_C(1) << 8) #define QT_FUNCTION_TARGET_STRING_AES "aes,sse4.2" -#define CpuFeatureAVX (Q_UINT64_C(1) << 10) +#define cpu_feature_avx (UINT64_C(1) << 9) #define QT_FUNCTION_TARGET_STRING_AVX "avx" -#define CpuFeatureF16C (Q_UINT64_C(1) << 11) -#define QT_FUNCTION_TARGET_STRING_F16C "f16c" -#define CpuFeatureRDRND (Q_UINT64_C(1) << 12) +#define cpu_feature_f16c (UINT64_C(1) << 10) +#define QT_FUNCTION_TARGET_STRING_F16C "f16c,avx" +#define cpu_feature_rdrnd (UINT64_C(1) << 11) #define QT_FUNCTION_TARGET_STRING_RDRND "rdrnd" // in CPUID Leaf 7, Sub-leaf 0, EBX: -#define CpuFeatureBMI (Q_UINT64_C(1) << 13) +#define cpu_feature_bmi (UINT64_C(1) << 12) #define QT_FUNCTION_TARGET_STRING_BMI "bmi" -#define CpuFeatureHLE (Q_UINT64_C(1) << 14) -#define QT_FUNCTION_TARGET_STRING_HLE "hle" -#define CpuFeatureAVX2 (Q_UINT64_C(1) << 15) -#define QT_FUNCTION_TARGET_STRING_AVX2 "avx2" -#define CpuFeatureBMI2 (Q_UINT64_C(1) << 16) +#define cpu_feature_avx2 (UINT64_C(1) << 13) +#define QT_FUNCTION_TARGET_STRING_AVX2 "avx2,avx" +#define cpu_feature_bmi2 (UINT64_C(1) << 14) #define QT_FUNCTION_TARGET_STRING_BMI2 "bmi2" -#define CpuFeatureRTM (Q_UINT64_C(1) << 17) -#define QT_FUNCTION_TARGET_STRING_RTM "rtm" -#define CpuFeatureAVX512F (Q_UINT64_C(1) << 18) -#define QT_FUNCTION_TARGET_STRING_AVX512F "avx512f" -#define CpuFeatureAVX512DQ (Q_UINT64_C(1) << 19) -#define QT_FUNCTION_TARGET_STRING_AVX512DQ "avx512dq" -#define CpuFeatureRDSEED (Q_UINT64_C(1) << 20) +#define cpu_feature_avx512f (UINT64_C(1) << 15) +#define QT_FUNCTION_TARGET_STRING_AVX512F "avx512f,avx" +#define cpu_feature_avx512dq (UINT64_C(1) << 16) +#define QT_FUNCTION_TARGET_STRING_AVX512DQ "avx512dq,avx512f" +#define cpu_feature_rdseed (UINT64_C(1) << 17) #define QT_FUNCTION_TARGET_STRING_RDSEED "rdseed" -#define CpuFeatureAVX512IFMA (Q_UINT64_C(1) << 21) -#define QT_FUNCTION_TARGET_STRING_AVX512IFMA "avx512ifma" -#define CpuFeatureAVX512PF (Q_UINT64_C(1) << 22) -#define QT_FUNCTION_TARGET_STRING_AVX512PF "avx512pf" -#define CpuFeatureAVX512ER (Q_UINT64_C(1) << 23) -#define QT_FUNCTION_TARGET_STRING_AVX512ER "avx512er" -#define CpuFeatureAVX512CD (Q_UINT64_C(1) << 24) -#define QT_FUNCTION_TARGET_STRING_AVX512CD "avx512cd" -#define CpuFeatureSHA (Q_UINT64_C(1) << 25) +#define cpu_feature_avx512ifma (UINT64_C(1) << 18) +#define QT_FUNCTION_TARGET_STRING_AVX512IFMA "avx512ifma,avx512f" +#define cpu_feature_avx512cd (UINT64_C(1) << 19) +#define QT_FUNCTION_TARGET_STRING_AVX512CD "avx512cd,avx512f" +#define cpu_feature_sha (UINT64_C(1) << 20) #define QT_FUNCTION_TARGET_STRING_SHA "sha" -#define CpuFeatureAVX512BW (Q_UINT64_C(1) << 26) -#define QT_FUNCTION_TARGET_STRING_AVX512BW "avx512bw" -#define CpuFeatureAVX512VL (Q_UINT64_C(1) << 27) -#define QT_FUNCTION_TARGET_STRING_AVX512VL "avx512vl" +#define cpu_feature_avx512bw (UINT64_C(1) << 21) +#define QT_FUNCTION_TARGET_STRING_AVX512BW "avx512bw,avx512f" +#define cpu_feature_avx512vl (UINT64_C(1) << 22) +#define QT_FUNCTION_TARGET_STRING_AVX512VL "avx512vl,avx512f" // in CPUID Leaf 7, Sub-leaf 0, ECX: -#define CpuFeatureAVX512VBMI (Q_UINT64_C(1) << 28) -#define QT_FUNCTION_TARGET_STRING_AVX512VBMI "avx512vbmi" -#define CpuFeatureAVX512VBMI2 (Q_UINT64_C(1) << 29) -#define QT_FUNCTION_TARGET_STRING_AVX512VBMI2 "avx512vbmi2" -#define CpuFeatureGFNI (Q_UINT64_C(1) << 30) +#define cpu_feature_avx512vbmi (UINT64_C(1) << 23) +#define QT_FUNCTION_TARGET_STRING_AVX512VBMI "avx512vbmi,avx512f" +#define cpu_feature_avx512vbmi2 (UINT64_C(1) << 24) +#define QT_FUNCTION_TARGET_STRING_AVX512VBMI2 "avx512vbmi2,avx512f" +#define cpu_feature_shstk (UINT64_C(1) << 25) +#define QT_FUNCTION_TARGET_STRING_SHSTK "shstk" +#define cpu_feature_gfni (UINT64_C(1) << 26) #define QT_FUNCTION_TARGET_STRING_GFNI "gfni" -#define CpuFeatureVAES (Q_UINT64_C(1) << 31) -#define QT_FUNCTION_TARGET_STRING_VAES "vaes" -#define CpuFeatureAVX512VNNI (Q_UINT64_C(1) << 32) -#define QT_FUNCTION_TARGET_STRING_AVX512VNNI "avx512vnni" -#define CpuFeatureAVX512BITALG (Q_UINT64_C(1) << 33) -#define QT_FUNCTION_TARGET_STRING_AVX512BITALG "avx512bitalg" -#define CpuFeatureAVX512VPOPCNTDQ (Q_UINT64_C(1) << 34) -#define QT_FUNCTION_TARGET_STRING_AVX512VPOPCNTDQ "avx512vpopcntdq" +#define cpu_feature_vaes (UINT64_C(1) << 27) +#define QT_FUNCTION_TARGET_STRING_VAES "vaes,avx2,avx,aes" +#define cpu_feature_avx512vnni (UINT64_C(1) << 28) +#define QT_FUNCTION_TARGET_STRING_AVX512VNNI "avx512vnni,avx512f" +#define cpu_feature_avx512bitalg (UINT64_C(1) << 29) +#define QT_FUNCTION_TARGET_STRING_AVX512BITALG "avx512bitalg,avx512f" +#define cpu_feature_avx512vpopcntdq (UINT64_C(1) << 30) +#define QT_FUNCTION_TARGET_STRING_AVX512VPOPCNTDQ "avx512vpopcntdq,avx512f" // in CPUID Leaf 7, Sub-leaf 0, EDX: -#define CpuFeatureAVX5124NNIW (Q_UINT64_C(1) << 35) -#define QT_FUNCTION_TARGET_STRING_AVX5124NNIW "avx5124nniw" -#define CpuFeatureAVX5124FMAPS (Q_UINT64_C(1) << 36) -#define QT_FUNCTION_TARGET_STRING_AVX5124FMAPS "avx5124fmaps" +#define cpu_feature_hybrid (UINT64_C(1) << 31) +#define QT_FUNCTION_TARGET_STRING_HYBRID "hybrid" +#define cpu_feature_ibt (UINT64_C(1) << 32) +#define QT_FUNCTION_TARGET_STRING_IBT "ibt" +#define cpu_feature_avx512fp16 (UINT64_C(1) << 33) +#define QT_FUNCTION_TARGET_STRING_AVX512FP16 "avx512fp16,avx512f,f16c" -static const quint64 qCompilerCpuFeatures = 0 +// CPU architectures +#define cpu_x86_64 (0 \ + | cpu_feature_sse2) +#define cpu_core2 (cpu_x86_64 \ + | cpu_feature_sse3 \ + | cpu_feature_ssse3) +#define cpu_nhm (cpu_core2 \ + | cpu_feature_sse4_1 \ + | cpu_feature_sse4_2 \ + | cpu_feature_popcnt) +#define cpu_wsm (cpu_nhm) +#define cpu_snb (cpu_wsm \ + | cpu_feature_avx) +#define cpu_ivb (cpu_snb \ + | cpu_feature_f16c \ + | cpu_feature_rdrnd) +#define cpu_hsw (cpu_ivb \ + | cpu_feature_avx2 \ + | cpu_feature_fma \ + | cpu_feature_bmi \ + | cpu_feature_bmi2 \ + | cpu_feature_movbe) +#define cpu_bdw (cpu_hsw \ + | cpu_feature_rdseed) +#define cpu_bdx (cpu_bdw) +#define cpu_skl (cpu_bdw) +#define cpu_adl (cpu_skl \ + | cpu_feature_gfni \ + | cpu_feature_vaes \ + | cpu_feature_shstk \ + | cpu_feature_ibt) +#define cpu_skx (cpu_skl \ + | cpu_feature_avx512f \ + | cpu_feature_avx512dq \ + | cpu_feature_avx512cd \ + | cpu_feature_avx512bw \ + | cpu_feature_avx512vl) +#define cpu_clx (cpu_skx \ + | cpu_feature_avx512vnni) +#define cpu_cpx (cpu_clx) +#define cpu_cnl (cpu_skx \ + | cpu_feature_avx512ifma \ + | cpu_feature_avx512vbmi) +#define cpu_icl (cpu_cnl \ + | cpu_feature_avx512vbmi2 \ + | cpu_feature_gfni \ + | cpu_feature_vaes \ + | cpu_feature_avx512vnni \ + | cpu_feature_avx512bitalg \ + | cpu_feature_avx512vpopcntdq) +#define cpu_icx (cpu_icl) +#define cpu_tgl (cpu_icl \ + | cpu_feature_shstk \ + | cpu_feature_ibt) +#define cpu_spr (cpu_tgl) +#define cpu_slm (cpu_wsm \ + | cpu_feature_rdrnd \ + | cpu_feature_movbe) +#define cpu_glm (cpu_slm \ + | cpu_feature_rdseed) +#define cpu_tnt (cpu_glm \ + | cpu_feature_gfni) +#define cpu_nehalem (cpu_nhm) +#define cpu_westmere (cpu_wsm) +#define cpu_sandybridge (cpu_snb) +#define cpu_ivybridge (cpu_ivb) +#define cpu_haswell (cpu_hsw) +#define cpu_broadwell (cpu_bdw) +#define cpu_skylake (cpu_skl) +#define cpu_skylake_avx512 (cpu_skx) +#define cpu_cascadelake (cpu_clx) +#define cpu_cooperlake (cpu_cpx) +#define cpu_cannonlake (cpu_cnl) +#define cpu_icelake_client (cpu_icl) +#define cpu_icelake_server (cpu_icx) +#define cpu_alderlake (cpu_adl) +#define cpu_sapphirerapids (cpu_spr) +#define cpu_tigerlake (cpu_tgl) +#define cpu_silvermont (cpu_slm) +#define cpu_goldmont (cpu_glm) +#define cpu_tremont (cpu_tnt) + +static const uint64_t _compilerCpuFeatures = 0 #ifdef __SSE2__ - | CpuFeatureSSE2 + | cpu_feature_sse2 #endif #ifdef __SSE3__ - | CpuFeatureSSE3 + | cpu_feature_sse3 #endif #ifdef __SSSE3__ - | CpuFeatureSSSE3 + | cpu_feature_ssse3 #endif #ifdef __FMA__ - | CpuFeatureFMA + | cpu_feature_fma #endif #ifdef __SSE4_1__ - | CpuFeatureSSE4_1 + | cpu_feature_sse4_1 #endif #ifdef __SSE4_2__ - | CpuFeatureSSE4_2 + | cpu_feature_sse4_2 #endif #ifdef __MOVBE__ - | CpuFeatureMOVBE + | cpu_feature_movbe #endif #ifdef __POPCNT__ - | CpuFeaturePOPCNT + | cpu_feature_popcnt #endif #ifdef __AES__ - | CpuFeatureAES + | cpu_feature_aes #endif #ifdef __AVX__ - | CpuFeatureAVX + | cpu_feature_avx #endif #ifdef __F16C__ - | CpuFeatureF16C + | cpu_feature_f16c #endif #ifdef __RDRND__ - | CpuFeatureRDRND + | cpu_feature_rdrnd #endif #ifdef __BMI__ - | CpuFeatureBMI -#endif -#ifdef __HLE__ - | CpuFeatureHLE + | cpu_feature_bmi #endif #ifdef __AVX2__ - | CpuFeatureAVX2 + | cpu_feature_avx2 #endif #ifdef __BMI2__ - | CpuFeatureBMI2 -#endif -#ifdef __RTM__ - | CpuFeatureRTM + | cpu_feature_bmi2 #endif #ifdef __AVX512F__ - | CpuFeatureAVX512F + | cpu_feature_avx512f #endif #ifdef __AVX512DQ__ - | CpuFeatureAVX512DQ + | cpu_feature_avx512dq #endif #ifdef __RDSEED__ - | CpuFeatureRDSEED + | cpu_feature_rdseed #endif #ifdef __AVX512IFMA__ - | CpuFeatureAVX512IFMA -#endif -#ifdef __AVX512PF__ - | CpuFeatureAVX512PF -#endif -#ifdef __AVX512ER__ - | CpuFeatureAVX512ER + | cpu_feature_avx512ifma #endif #ifdef __AVX512CD__ - | CpuFeatureAVX512CD + | cpu_feature_avx512cd #endif #ifdef __SHA__ - | CpuFeatureSHA + | cpu_feature_sha #endif #ifdef __AVX512BW__ - | CpuFeatureAVX512BW + | cpu_feature_avx512bw #endif #ifdef __AVX512VL__ - | CpuFeatureAVX512VL + | cpu_feature_avx512vl #endif #ifdef __AVX512VBMI__ - | CpuFeatureAVX512VBMI + | cpu_feature_avx512vbmi #endif #ifdef __AVX512VBMI2__ - | CpuFeatureAVX512VBMI2 + | cpu_feature_avx512vbmi2 +#endif +#ifdef __SHSTK__ + | cpu_feature_shstk #endif #ifdef __GFNI__ - | CpuFeatureGFNI + | cpu_feature_gfni #endif #ifdef __VAES__ - | CpuFeatureVAES + | cpu_feature_vaes #endif #ifdef __AVX512VNNI__ - | CpuFeatureAVX512VNNI + | cpu_feature_avx512vnni #endif #ifdef __AVX512BITALG__ - | CpuFeatureAVX512BITALG + | cpu_feature_avx512bitalg #endif #ifdef __AVX512VPOPCNTDQ__ - | CpuFeatureAVX512VPOPCNTDQ + | cpu_feature_avx512vpopcntdq #endif -#ifdef __AVX5124NNIW__ - | CpuFeatureAVX5124NNIW +#ifdef __HYBRID__ + | cpu_feature_hybrid #endif -#ifdef __AVX5124FMAPS__ - | CpuFeatureAVX5124FMAPS +#ifdef __IBT__ + | cpu_feature_ibt +#endif +#ifdef __AVX512FP16__ + | cpu_feature_avx512fp16 #endif ; -QT_END_NAMESPACE +#if (defined __cplusplus) && __cplusplus >= 201103L +enum X86CpuFeatures : uint64_t { + CpuFeatureSSE2 = cpu_feature_sse2, ///< Streaming SIMD Extensions 2 + CpuFeatureSSE3 = cpu_feature_sse3, ///< Streaming SIMD Extensions 3 + CpuFeatureSSSE3 = cpu_feature_ssse3, ///< Supplemental Streaming SIMD Extensions 3 + CpuFeatureFMA = cpu_feature_fma, ///< Fused Multiply-Add + CpuFeatureSSE4_1 = cpu_feature_sse4_1, ///< Streaming SIMD Extensions 4.1 + CpuFeatureSSE4_2 = cpu_feature_sse4_2, ///< Streaming SIMD Extensions 4.2 + CpuFeatureMOVBE = cpu_feature_movbe, ///< MOV Big Endian + CpuFeaturePOPCNT = cpu_feature_popcnt, ///< Population count + CpuFeatureAES = cpu_feature_aes, ///< Advenced Encryption Standard + CpuFeatureAVX = cpu_feature_avx, ///< Advanced Vector Extensions + CpuFeatureF16C = cpu_feature_f16c, ///< 16-bit Float Conversion + CpuFeatureRDRND = cpu_feature_rdrnd, ///< Random number generator + CpuFeatureBMI = cpu_feature_bmi, ///< Bit Manipulation Instructions + CpuFeatureAVX2 = cpu_feature_avx2, ///< Advanced Vector Extensions 2 + CpuFeatureBMI2 = cpu_feature_bmi2, ///< Bit Manipulation Instructions 2 + CpuFeatureAVX512F = cpu_feature_avx512f, ///< AVX512 Foundation + CpuFeatureAVX512DQ = cpu_feature_avx512dq, ///< AVX512 Double & Quadword + CpuFeatureRDSEED = cpu_feature_rdseed, ///< Random number generator for seeding + CpuFeatureAVX512IFMA = cpu_feature_avx512ifma, ///< AVX512 Integer Fused Multiply-Add + CpuFeatureAVX512CD = cpu_feature_avx512cd, ///< AVX512 Conflict Detection + CpuFeatureSHA = cpu_feature_sha, ///< SHA-1 and SHA-256 instructions + CpuFeatureAVX512BW = cpu_feature_avx512bw, ///< AVX512 Byte & Word + CpuFeatureAVX512VL = cpu_feature_avx512vl, ///< AVX512 Vector Length + CpuFeatureAVX512VBMI = cpu_feature_avx512vbmi, ///< AVX512 Vector Byte Manipulation Instructions + CpuFeatureAVX512VBMI2 = cpu_feature_avx512vbmi2, ///< AVX512 Vector Byte Manipulation Instructions 2 + CpuFeatureSHSTK = cpu_feature_shstk, ///< Control Flow Enforcement Technology Shadow Stack + CpuFeatureGFNI = cpu_feature_gfni, ///< Galois Field new instructions + CpuFeatureVAES = cpu_feature_vaes, ///< 256- and 512-bit AES + CpuFeatureAVX512VNNI = cpu_feature_avx512vnni, ///< AVX512 Vector Neural Network Instructions + CpuFeatureAVX512BITALG = cpu_feature_avx512bitalg, ///< AVX512 Bit Algorithms + CpuFeatureAVX512VPOPCNTDQ = cpu_feature_avx512vpopcntdq, ///< AVX512 Population Count + CpuFeatureHYBRID = cpu_feature_hybrid, ///< Hybrid processor + CpuFeatureIBT = cpu_feature_ibt, ///< Control Flow Enforcement Technology Indirect Branch Tracking + CpuFeatureAVX512FP16 = cpu_feature_avx512fp16, ///< AVX512 16-bit Floating Point +}; // enum X86CpuFeatures -#endif // QSIMD_X86_P_H +enum X86CpuArchitectures : uint64_t { + CpuArchx8664 = cpu_x86_64, + CpuArchCore2 = cpu_core2, + CpuArchNHM = cpu_nhm, + CpuArchWSM = cpu_wsm, + CpuArchSNB = cpu_snb, + CpuArchIVB = cpu_ivb, + CpuArchHSW = cpu_hsw, + CpuArchBDW = cpu_bdw, + CpuArchBDX = cpu_bdx, + CpuArchSKL = cpu_skl, + CpuArchADL = cpu_adl, + CpuArchSKX = cpu_skx, + CpuArchCLX = cpu_clx, + CpuArchCPX = cpu_cpx, + CpuArchCNL = cpu_cnl, + CpuArchICL = cpu_icl, + CpuArchICX = cpu_icx, + CpuArchTGL = cpu_tgl, + CpuArchSPR = cpu_spr, + CpuArchSLM = cpu_slm, + CpuArchGLM = cpu_glm, + CpuArchTNT = cpu_tnt, + CpuArchNehalem = cpu_nehalem, ///< Intel Core i3/i5/i7 + CpuArchWestmere = cpu_westmere, ///< Intel Core i3/i5/i7 + CpuArchSandyBridge = cpu_sandybridge, ///< Second Generation Intel Core i3/i5/i7 + CpuArchIvyBridge = cpu_ivybridge, ///< Third Generation Intel Core i3/i5/i7 + CpuArchHaswell = cpu_haswell, ///< Fourth Generation Intel Core i3/i5/i7 + CpuArchBroadwell = cpu_broadwell, ///< Fifth Generation Intel Core i3/i5/i7 + CpuArchSkylake = cpu_skylake, ///< Sixth Generation Intel Core i3/i5/i7 + CpuArchSkylakeAvx512 = cpu_skylake_avx512, ///< Intel Xeon Scalable + CpuArchCascadeLake = cpu_cascadelake, ///< Second Generation Intel Xeon Scalable + CpuArchCooperLake = cpu_cooperlake, ///< Third Generation Intel Xeon Scalable + CpuArchCannonLake = cpu_cannonlake, ///< Intel Core i3-8121U + CpuArchIceLakeClient = cpu_icelake_client, ///< Tenth Generation Intel Core i3/i5/i7 + CpuArchIceLakeServer = cpu_icelake_server, ///< Third Generation Intel Xeon Scalable + CpuArchAlderLake = cpu_alderlake, + CpuArchSapphireRapids = cpu_sapphirerapids, + CpuArchTigerLake = cpu_tigerlake, ///< Eleventh Generation Intel Core i3/i5/i7 + CpuArchSilvermont = cpu_silvermont, + CpuArchGoldmont = cpu_goldmont, + CpuArchTremont = cpu_tremont, +}; // enum X86cpuArchitectures +#endif /* C++11 */ + +#endif /* QSIMD_X86_P_H */ diff --git a/util/x86simdgen/.gitignore b/util/x86simdgen/.gitignore new file mode 100644 index 00000000000..e9c3a83ddeb --- /dev/null +++ b/util/x86simdgen/.gitignore @@ -0,0 +1 @@ +qsimd_x86_p.h diff --git a/util/x86simdgen/3rdparty/simd-amd.conf b/util/x86simdgen/3rdparty/simd-amd.conf new file mode 100644 index 00000000000..dd7c214207b --- /dev/null +++ b/util/x86simdgen/3rdparty/simd-amd.conf @@ -0,0 +1,35 @@ +# -*- mode: conf; indent-tabs-mode: t -*- +# Feature CPUID function Bit Required feature +#mmxext Leaf80000001hEDX 22 # AMD extensions to MMX +#rdtscp Leaf80000001hEDX 27 # RDTSCP instruction +#3dnow Leaf80000001hEDX 31 # 3DNow! instructions +#3dnowext Leaf80000001hEDX 30 # AMD extensions to 3DNow! +lzcnt Leaf80000001hECX 5 # Leading Zero Count +sse4a Leaf80000001hECX 6 # SSE4a +xop Leaf80000001hECX 11 # eXtended Operations +fma4 Leaf80000001hECX 16 # 4-operand Fused Multiply-Add +tbm Leaf80000001hECX 21 # Trailing Bit Manipulation +clzero Leaf80000008hEBX 0 # Cacheline clear and write zero +wbnoinvd Leaf80000008hEBX 9 # Write Back with No Invalidate + +# Processor/arch listing below this line +# Source: GCC gcc/common/config/i386/i386-common.c +# Source: Wikipedia +# Architecture Based on New features +arch=AmdFam10h x86_64 sse3,sse4a,cx16,popcnt,lzcnt # AMD K10 +arch=BtVer1 AmdFam10h xsave # AMD Bobcat v1 +arch=BtVer2 BtVer1 ssse3,sse4.1,sse4.2,avx,bmi,f16c,movbe,xsaveopt # AMD Bobcat v2 +arch=BdVer1 BtVer1 ssse3,sse4.1,sse4.2,avx,xop,fma4 +arch=BdVer2 BdVer1 bmi,f16c,fma,tbm +arch=BdVer3 BdVer2 fsgsbase,xsaveopt +arch=BdVer4 BdVer3 avx2,bmi2,rdrnd,movbe +arch=ZnVer1 BdVer4 adx,rdseed,clzero,clfushopt,xsavec,xsaves +arch=ZnVer2 ZnVer1 clwb,wbnoinvd + +arch=Barcelona AmdFam10h +arch=Bulldozer BdVer1 # AMD Bulldozer +arch=Piledriver BdVer2 # AMD Bulldozer v2 (Piledriver) +arch=Steamroller BdVer3 # AMD Bulldozer v3 (Steamroller) +arch=Excavator BdVer4 # AMD Bulldozer v4 (Excavator) +arch=Zen ZnVer1 # AMD Zen +arch=Zen2 ZnVer2 # AMD Zen2 diff --git a/util/x86simdgen/3rdparty/simd-intel.conf b/util/x86simdgen/3rdparty/simd-intel.conf new file mode 100644 index 00000000000..0db4259f974 --- /dev/null +++ b/util/x86simdgen/3rdparty/simd-intel.conf @@ -0,0 +1,179 @@ +# -*- mode: conf; indent-tabs-mode: t -*- +# Feature CPUID function Bit Required feature +#sep Leaf01EDX 11 # Sysenter/sysexit +#cmov Leaf01EDX 15 # Conditional Move +#clflush Leaf01EDX 19 # Cache-Line Flush +#mmx Leaf01EDX 23 # Multi Media Extensions +#fxsr Leaf01EDX 24 # FXSAVE instruction +#sse Leaf01EDX 25 # Streaming SIMD Extensions +sse2 Leaf01EDX 26 # Streaming SIMD Extensions 2 +# -- everything above this line is mandatory on x86-64 -- +sse3 Leaf01ECX 0 # Streaming SIMD Extensions 3 +#pclmul Leaf01ECX 1 # Carryless Multiply +ssse3 Leaf01ECX 9 # Supplemental Streaming SIMD Extensions 3 +fma Leaf01ECX 12 # Fused Multiply-Add +#cx16 Leaf01ECX 13 # Compare-Exchange 16 bytes +sse4.1 Leaf01ECX 19 # Streaming SIMD Extensions 4.1 +sse4.2 Leaf01ECX 20 # Streaming SIMD Extensions 4.2 +movbe Leaf01ECX 22 # MOV Big Endian +popcnt Leaf01ECX 23 # Population count +aes Leaf01ECX 25 sse4.2 # Advenced Encryption Standard +#xsave Leaf01ECX 26 # XSAVE, XGETBV instructions +#osxsave Leaf01ECX 27 # XSAVE enabled by OS +avx Leaf01ECX 28 # Advanced Vector Extensions +f16c Leaf01ECX 29 avx # 16-bit Float Conversion +rdrnd Leaf01ECX 30 # Random number generator +#hypervisor Leaf01ECX 31 # Running on a hypervisor +#fsgsbase Leaf07_00EBX 0 # FS/GS base access +bmi Leaf07_00EBX 3 # Bit Manipulation Instructions +#hle Leaf07_00EBX 4 # Hardware Lock Ellision +avx2 Leaf07_00EBX 5 avx # Advanced Vector Extensions 2 +bmi2 Leaf07_00EBX 8 # Bit Manipulation Instructions 2 +#erms Leaf07_00EBX 9 # Enhanced REP MOVSB/STOSB +#rtm Leaf07_00EBX 11 # Restricted Transactional Memory +#rdt_m Leaf07_00EBX 12 # Resource Director Technology (RDT) Monitoring +#mpx Leaf07_00EBX 14 # Memory Protection Extensions +#rdt_a Leaf07_00EBX 12 # Resource Director Technology (RDT) Allocation +avx512f Leaf07_00EBX 16 avx # AVX512 Foundation +avx512dq Leaf07_00EBX 17 avx512f # AVX512 Double & Quadword +rdseed Leaf07_00EBX 18 # Random number generator for seeding +#adx Leaf07_00EBX 19 # Multi-Precision Add-Carry +avx512ifma Leaf07_00EBX 21 avx512f # AVX512 Integer Fused Multiply-Add +#clflushopt Leaf07_00EBX 23 # Cache-Fline Flush Optimized +#clwb Leaf07_00EBX 24 # Cache-Line Write Back +#avx512pf Leaf07_00EBX 26 avx512f # AVX512 Prefetch +#avx512er Leaf07_00EBX 27 avx512f # AVX512 Exponential & Reciprocal +avx512cd Leaf07_00EBX 28 avx512f # AVX512 Conflict Detection +sha Leaf07_00EBX 29 # SHA-1 and SHA-256 instructions +avx512bw Leaf07_00EBX 30 avx512f # AVX512 Byte & Word +avx512vl Leaf07_00EBX 31 avx512f # AVX512 Vector Length +avx512vbmi Leaf07_00ECX 1 avx512f # AVX512 Vector Byte Manipulation Instructions +#pku Leaf07_00ECX 3 # Protection Keys for User mode +#ospke Leaf07_00ECX 4 # Protection Keys Enabled by OS +#waitpkg Leaf07_00ECX 5 # User-Level Monitor / Wait +avx512vbmi2 Leaf07_00ECX 6 avx512f # AVX512 Vector Byte Manipulation Instructions 2 +shstk Leaf07_00ECX 7 # Control Flow Enforcement Technology Shadow Stack +gfni Leaf07_00ECX 8 # Galois Field new instructions +vaes Leaf07_00ECX 9 avx2,avx,aes # 256- and 512-bit AES +#vpclmulqdq Leaf07_00ECX 10 avx # 256- and 512-bit Carryless Multiply +avx512vnni Leaf07_00ECX 11 avx512f # AVX512 Vector Neural Network Instructions +avx512bitalg Leaf07_00ECX 12 avx512f # AVX512 Bit Algorithms +avx512vpopcntdq Leaf07_00ECX 14 avx512f # AVX512 Population Count +#la57 Leaf07_00ECX 16 # 5-level page tables +#rdpid Leaf07_00ECX 22 # RDPID instruction +#cldemote Leaf07_00ECX 25 # Cache Line Demotion +#movdiri Leaf07_00ECX 27 # Move Direct-store Integer +#movdir64b Leaf07_00ECX 28 # Move Direct-store 64 bytes +#enqcmd Leaf07_00ECX 29 # Enqueue Command +#pks Leaf07_00ECX 31 # Protection Keys for Supervisor mode +#avx5124nniw Leaf07_00EDX 2 avx512f # AVX512 4-iteration Vector Neural Network Instructions +#avx5124fmaps Leaf07_00EDX 3 avx512f # AVX512 4-iteration Fused Multiply Accumulation +#fsrm Leaf07_00EDX 4 # Fast Short REP MOV +#uintr Leaf07_00EDX 5 # User interrupts +#avx512vp2intersect Leaf07_00EDX 8 avx512f # AVX512 Intersection computation +#serialize Leaf07_00EDX 14 # SERIALIZE instruction +hybrid Leaf07_00EDX 15 # Hybrid processor +#tsxldtrk Leaf07_00EDX 16 # TDX (RTM) Suspend Load Address Tracking +#pconfig Leaf07_00EDX 18 # Platform configuration +ibt Leaf07_00EDX 20 # Control Flow Enforcement Technology Indirect Branch Tracking +#amxbf16 Leaf07_00EDX 22 amxtile # AMX Tile multiplication in BFloat16 +avx512fp16 Leaf07_00EDX 23 avx512f,f16c # AVX512 16-bit Floating Point +#amxtile Leaf07_00EDX 24 # Advanced Matrix Extensions Tile support +#amxint8 Leaf07_00EDX 25 amxtile # AMX Tile multiplication for Int8 +#avxvnni Leaf07_01EAX 4 avx # AVX (VEX-encoded) versions of the Vector Neural Network Instructions +#avx512bf16 Leaf07_01EAX 5 avx512f # AVX512 Brain Float16 +#zlmovsb Leaf07_01EAX 10 # Zero-length MOVSB +#fsrs Leaf07_01EAX 11 # Fast Short (REP?) STOSB +#fsrc Leaf07_01EAX 12 # Fast Short (REP?) CMPSB, SCASB +#fred Leaf07_01EAX 17 # Flexible Return and Event Delivery +#lkgs Leaf07_01EAX 18 # Load into Kernel GS +#lam Leaf07_01EAX 26 # Linear Address Masking +#xsaveopt Leaf13_01EAX 0 # Optimized XSAVE +#xsavec Leaf13_01EAX 1 # XSAVE with Compaction +#xgetbv1 Leaf13_01EAX 2 # XGETBV with ECX=1 +#xsaves Leaf13_01EAX 3 # XSAVE Supervisor mode +#xfd Leaf13_01EAX 4 # eXtended Feature Disable MSR +#lzcnt Leaf80000001hECX 5 # Leading Zero Count + +# XSAVE states +# Source: Intel Software Development Manual, Volume 1, Chapter 13 +# Source: Intel Instruction Set Extensions Manual (ed. 041), Chapter 3, "Intel AMX Instruction Set" +# Grouping Value Required for +xsave=X87 0x0001 # X87 and MMX state +xsave=SseState 0x0002 sse # SSE: 128 bits of XMM registers +xsave=Ymm_Hi128 0x0004 # AVX: high 128 bits in YMM registers +xsave=Bndregs 0x0008 # Memory Protection Extensions +xsave=Bndcsr 0x0010 # Memory Protection Extensions +xsave=OpMask 0x0020 # AVX512: k0 through k7 +xsave=Zmm_Hi256 0x0040 # AVX512: high 256 bits of ZMM0-15 +xsave=Hi16_Zmm 0x0080 # AVX512: all 512 bits of ZMM16-31 +xsave=PTState 0x0100 # Processor Trace +xsave=PKRUState 0x0200 pku # Protection Key +# ??? 0x0400 +xsave=CetUState 0x0800 # CET: user mode +xsave=CetSState 0x1000 # CET: supervisor mode +xsave=HdcState 0x2000 # Hardware Duty Cycle +xsave=UintrState 0x4000 uintr # User Interrupts +# ??? 0x8000 +xsave=HwpState 0x10000 # Hardware P-State +xsave=Xtilecfg 0x20000 # AMX: XTILECFG register +xsave=Xtiledata 0x40000 # AMX: data in the tiles +xsave=AvxState SseState|Ymm_Hi128 avx,fma,avx512f +xsave=MPXState Bndregs|Bndcsr mpx +xsave=Avx512State AvxState|OpMask|Zmm_Hi256|Hi16_Zmm avx512f +xsave=CetState CetUState|CetSState shstk +xsave=AmxState Xtilecfg|Xtiledata amxtile + +# Processor/arch listing below this line +# Source: Intel Instruction Set Extension manual, section 1.2 +# Source: GCC gcc/config/i386/i386.h, i386-c.c, i386-builtins.c +# Architecture Based on New features Optional features +arch=x86_64 <> sse2 + # Core line +arch=Core2 x86_64 sse3,ssse3,cx16 +arch=NHM Core2 sse4.1,sse4.2,popcnt +arch=WSM NHM +arch=SNB WSM avx +arch=IVB SNB f16c,rdrnd,fsgsbase +arch=HSW IVB avx2,fma,bmi,bmi2,lzcnt,movbe +arch=BDW HSW adx,rdseed +arch=BDX BDW +arch=SKL BDW xsavec,xsaves +arch=ADL SKL avxvnni,gfni,vaes,vpclmulqdq,serialize,shstk,cldemote,movdiri,movdir64b,ibt,waitpkg,keylocker rdpid +arch=SKX SKL avx512f,avx512dq,avx512cd,avx512bw,avx512vl clwb +arch=CLX SKX avx512vnni +arch=CPX CLX avx512bf16 +arch=CNL SKX avx512ifma,avx512vbmi sha +arch=ICL CNL avx512vbmi2,gfni,vaes,vpclmulqdq,avx512vnni,avx512bitalg,avx512vpopcntdq fsrm,rdpid +arch=ICX ICL pconfig +arch=TGL ICL avx512vp2intersect,shstk,,movdiri,movdir64b,ibt,keylocker +arch=SPR TGL avx512bf16,amxtile,amxbf16,amxint8,avxvnni,cldemote,pconfig,waitpkg,serialize,tsxldtrk,uintr + # Atom line +arch=SLM WSM rdrnd,movbe +arch=GLM SLM fsgsbase,rdseed,lzcnt,xsavec,xsaves +arch=TNT GLM clwb,gfni,cldemote,waitpkg,movdiri,movdir64b + # Xeon Phi line +#arch=KNL SKL avx512f,avx512er,avx512pf,avx512cd +#arch=KNM KNL avx5124fmaps,avx5124vnniw,avx512vpopcntdq + # Longer names +arch=Nehalem NHM # Intel Core i3/i5/i7 +arch=Westmere WSM # Intel Core i3/i5/i7 +arch=SandyBridge SNB # Second Generation Intel Core i3/i5/i7 +arch=IvyBridge IVB # Third Generation Intel Core i3/i5/i7 +arch=Haswell HSW # Fourth Generation Intel Core i3/i5/i7 +arch=Broadwell BDW # Fifth Generation Intel Core i3/i5/i7 +arch=Skylake SKL # Sixth Generation Intel Core i3/i5/i7 +arch=Skylake-Avx512 SKX # Intel Xeon Scalable +arch=CascadeLake CLX # Second Generation Intel Xeon Scalable +arch=CooperLake CPX # Third Generation Intel Xeon Scalable +arch=CannonLake CNL # Intel Core i3-8121U +arch=IceLake-Client ICL # Tenth Generation Intel Core i3/i5/i7 +arch=IceLake-Server ICX # Third Generation Intel Xeon Scalable +arch=AlderLake ADL +arch=SapphireRapids SPR +arch=TigerLake TGL # Eleventh Generation Intel Core i3/i5/i7 +arch=Silvermont SLM +arch=Goldmont GLM +arch=Tremont TNT +#arch=KnightsLanding KNL +#arch=KnightsMill KNM diff --git a/util/x86simdgen/3rdparty/x86simd_generate.pl b/util/x86simdgen/3rdparty/x86simd_generate.pl new file mode 100755 index 00000000000..a07f858dcc9 --- /dev/null +++ b/util/x86simdgen/3rdparty/x86simd_generate.pl @@ -0,0 +1,329 @@ +#!/usr/bin/env perl + +# SPDX-License-Identifier: Apache-2.0 + +use strict; +$\ = "\n"; +$/ = "\n"; +my $debug = 0; +my %leaves = ( + Leaf01ECX => "CPUID Leaf 1, ECX", + Leaf07_00EBX => "CPUID Leaf 7, Sub-leaf 0, EBX", + Leaf07_00ECX => "CPUID Leaf 7, Sub-leaf 0, ECX", + Leaf07_00EDX => "CPUID Leaf 7, Sub-leaf 0, EDX", + Leaf07_01EAX => "CPUID Leaf 7, Sub-leaf 1, EAX", + Leaf13_01EAX => "CPUID Leaf 13, Sub-leaf 1, EAX", + Leaf80000001hECX => "CPUID Leaf 80000001h, ECX", + Leaf80000008hEBX => "CPUID Leaf 80000008h, EBX", +); +my @leafNames = sort keys %leaves; + +# out of order (we want it first) +unshift @leafNames, "Leaf01EDX"; +$leaves{Leaf01EDX} = "CPUID Leaf 1, EDX"; + +# Read input from file specified by first argument +my $input_conf_file = shift @ARGV; +open(FH, '<', $input_conf_file) or die $!; + +my $i = 0; +my @features; +my @architecture_names; +my %architectures; +my @xsaveStates; +my $maxarchnamelen = 0; +while () { + chomp $_; + m/#\s*(.*)\s*/; + my $comment = $1; + + s/#.*$//; + s/^\s+//; + next if $_ eq ""; + + if (s/^arch=//) { + my ($arch, $based, $f) = split /\s+/; + die("Unknown base architecture \"$based\"") + unless $based eq "<>" or grep {$_ eq $based} @architecture_names; + my $id = lc($arch); + $id =~ s/[^A-Za-z0-9_]/_/g; + + my $prettyname = $arch; + $prettyname =~ s/\B([A-Z])/ $1/g; + $prettyname =~ s/-(\w+)/ ($1)/g; + $maxarchnamelen = length($prettyname) if length($prettyname) > $maxarchnamelen; + + my @basefeatures; + my @extrafeatures; + @basefeatures = @{$architectures{$based}->{allfeatures}} if $based ne "<>"; + @extrafeatures = @{$architectures{$arch}{features}} if defined($architectures{$arch}); + @extrafeatures = (@extrafeatures, split(',', $f)); + my @allfeatures = sort (@basefeatures, @extrafeatures); + + $architectures{$arch} = { + name => $arch, + prettyname => $prettyname, + id => $id, + base => $based, + features => \@extrafeatures, + allfeatures => \@allfeatures, + comment => $comment + }; + push @architecture_names, $arch + unless grep {$_ eq $arch} @architecture_names; + } elsif (s/^xsave=//) { + my ($name, $value, $required) = split /\s+/; + push @xsaveStates, + { id => $name, value => $value, required_for => $required, comment => $comment }; + } else { + my ($name, $function, $bit, $depends) = split /\s+/; + die("Unknown CPUID function \"$function\"") + unless grep {$_ eq $function} @leafNames; + if (my @match = grep { $_->{name} eq $name } @features) { + die("internal error") if scalar @match != 1; + next if $match[0]->{function} eq $function && + $match[0]->{bit} eq $bit && $match[0]->{depends} eq $depends; + die("Duplicate feature \"$name\" with different details. " . + "Previously was $match[0]->{function} bit $match[0]->{bit}."); + } + + my $id = uc($name); + $id =~ s/[^A-Z0-9_]/_/g; + push @features, + { name => $name, depends => $depends, id => $id, bit => $bit, leaf => $function, comment => $comment }; + ++$i; + die("Too many features to fit a 64-bit integer") if $i > 64; + } +} +close FH; + +# Print the header output +my $headername = ""; +my $headerguard = ""; +if ($headername = shift @ARGV) { + + $headerguard = uc($headername); + $headerguard =~ s/[^A-Z0-9_]/_/g; + + print qq|// This is a generated file. DO NOT EDIT. +// Please see $0 +#ifndef $headerguard +#define $headerguard + +#include |; +} else { + $debug = 1; +} + +# Print the feature list +my $lastleaf; +for (my $i = 0; $i < scalar @features; ++$i) { + my $feature = $features[$i]; + # Leaf header: + printf "\n// in %s:\n", $leaves{$feature->{leaf}} + if $feature->{leaf} ne $lastleaf; + $lastleaf = $feature->{leaf}; + + # Feature + printf "#define cpu_feature_%-31s (UINT64_C(1) << %d)\n", lc($feature->{id}), $i; + + # Feature string names for Clang and GCC + my $str = $feature->{name} . ',' . $feature->{depends}; + $str =~ s/,$//; + printf "#define QT_FUNCTION_TARGET_STRING_%-17s \"%s\"\n", + $feature->{id}, $str; +} + +# Print the architecture list +print "\n// CPU architectures"; +for (@architecture_names) { + my $arch = $architectures{$_}; + my $base = $arch->{base}; + if ($base eq "<>") { + $base = "0"; + } else { + $base =~ s/[^A-Za-z0-9_]/_/g; + $base = "cpu_" . $base; + } + + printf "#define cpu_%-19s (%s", lc($arch->{id}), lc($base); + + for my $f (@{$arch->{features}}) { + my @match = grep { $_->{name} eq $f } @features; + if (scalar @match == 1) { + printf " \\\n%33s| cpu_feature_%s", " ", lc($match[0]->{id}); + } else { + printf STDERR "%s: unknown feature '%s' for CPU '%s'\n", $0, $f, $arch->{name} + if $debug; + } + } + print ")"; +} + +print q{ +static const uint64_t _compilerCpuFeatures = 0}; + +# And print the compiler-enabled features part: +for (my $i = 0; $i < scalar @features; ++$i) { + my $feature = $features[$i]; + printf + "#ifdef __%s__\n" . + " | cpu_feature_%s\n" . + "#endif\n", + $feature->{id}, lc($feature->{id}); +} + +print ' ;'; +if ($headerguard ne "") { + print q| +#if (defined __cplusplus) && __cplusplus >= 201103L +enum X86CpuFeatures : uint64_t {|; + + for (@features) { + my $line = sprintf "CpuFeature%s = cpu_feature_%s,", $_->{id}, lc($_->{id}); + if ($_->{comment} ne "") { + printf " %-56s ///< %s\n", $line, $_->{comment}; + } else { + print " $line"; + } + } + +print qq|}; // enum X86CpuFeatures + +enum X86CpuArchitectures : uint64_t {|; + + for (@architecture_names) { + my $arch = $architectures{$_}; + my $name = $arch->{name}; + $name =~ s/[^A-Za-z0-9]//g; + my $line = sprintf "CpuArch%s = cpu_%s,", $name, lc($arch->{id}); + if ($arch->{comment} ne "") { + printf " %-56s ///< %s\n", $line, $arch->{comment}; + } else { + print " $line"; + } + } + + print qq|}; // enum X86cpuArchitectures +#endif /* C++11 */\n|; +}; + +print "// -- implementation start --\n"; +# Now generate the string table and bit-location array +my $offset = 0; +my @offsets; +print "static const char features_string[] ="; +for my $feature (@features) { + print " \" $feature->{name}\\0\""; + push @offsets, $offset; + $offset += 2 + length($feature->{name}); +} +print " \"\\0\";"; + +# Print the string offset table +printf "\nstatic const %s features_indices[] = {", + $offset > 255 ? "uint16_t" : "uint8_t"; +for (my $j = 0; $j < scalar @offsets; ++$j) { + printf "%s%3d,", + $j % 8 ? " " : "\n ", $offsets[$j]; +} +print "\n};"; + +# Print the locator enum and table +print "\nenum X86CpuidLeaves {"; +map { print " $_," } @leafNames; +print " X86CpuidMaxLeaf\n};"; + +my $type = scalar %leaves > 8 ? "uint16_t" : "uint8_t"; +printf "\nstatic const %s x86_locators[] = {\n", + $type, $type; +for (my $j = 0; $j < scalar @features; ++$j) { + my $feature = $features[$j]; + printf " %s*32 + %2d, %s// %s\n", + $feature->{leaf}, $feature->{bit}, ' ' x (24 - length($feature->{leaf})), $feature->{name}; +} +print '};'; + +# Generate the processor name listing, sorted by feature length +my %sorted_archs; +for (@architecture_names) { + my $arch = $architectures{$_}; + my $key = sprintf "%02d_%s", scalar(@{$arch->{allfeatures}}), join(',', @{$arch->{allfeatures}}); + $sorted_archs{$key} = $arch; +} +print qq| +struct X86Architecture +{ + uint64_t features; + char name[$maxarchnamelen + 1]; +}; + +static const struct X86Architecture x86_architectures[] = {|; +for (sort { $b <=> $a } keys %sorted_archs) { + my $arch = $sorted_archs{$_}; + next if $arch->{base} eq "<>"; + printf " { cpu_%s, \"%s\" },\n", $arch->{id}, $arch->{prettyname}; +} +print "};"; + +# Produce the list of XSAVE states +print "\nenum XSaveBits {"; +my $xsaveEnumPrefix = "XSave_"; +for my $state (@xsaveStates) { + my $value = $state->{value}; + unless ($value =~ /^0x/) { + # Compound value + $value = join(" | ", map { $xsaveEnumPrefix . $_ } split(/\|/, $value)); + } + printf " %s%-12s = %s,", $xsaveEnumPrefix, $state->{id}, $value; + printf "%s// %s", ' ' x (18 - length($value)), $state->{comment} + if $state->{comment} ne ''; + printf "\n"; +}; +print "};"; + +# Produce a list of features require extended XSAVE state +my $xsaveRequirementMapping; +for my $state (@xsaveStates) { + my $xsaveReqPrefix = "XSaveReq_"; + my @required_for = split /,/, $state->{required_for}; + next unless scalar @required_for; + + my $prefix = sprintf "\n// List of features requiring %s%s\nstatic const uint64_t %s%s = 0", + $xsaveEnumPrefix, $state->{id}, $xsaveReqPrefix, $state->{id}; + + # match either the feature name or one of its requirements against list + # of features that this state is required for + for my $feature (@features) { + my $id = lc($feature->{id}); + my $required = 0; + for my $requirement (@required_for) { + my @depends = split /,/, "$id," . $feature->{depends}; + $required = grep { $_ eq $requirement } @depends; + last if $required; + } + printf "$prefix\n | cpu_feature_%s", $id if $required; + $prefix = "" if $required; + } + + if ($prefix eq "") { + # we printed something + print ";"; + $xsaveRequirementMapping .= sprintf " { %s%s, %s%s },\n", + $xsaveReqPrefix, $state->{id}, $xsaveEnumPrefix, $state->{id}; + } +} + +# Finally, make a table +printf qq| +struct XSaveRequirementMapping +{ + uint64_t cpu_features; + uint64_t xsave_state; +}; + +static const struct XSaveRequirementMapping xsave_requirements[] = { +%s}; + +// -- implementation end -- +#endif /* $headerguard */\n|, $xsaveRequirementMapping if $xsaveRequirementMapping ne ""; diff --git a/util/x86simdgen/Makefile b/util/x86simdgen/Makefile new file mode 100644 index 00000000000..a727af42178 --- /dev/null +++ b/util/x86simdgen/Makefile @@ -0,0 +1,19 @@ +GENERATOR = 3rdparty/x86simd_generate.pl +TARGETDIR = ../../src/corelib/global/ +TARGETCPP = qsimd_x86.cpp +TARGETHEADER = qsimd_x86_p.h + +CONF_FILES = 3rdparty/simd-intel.conf +# We don't currently use any feature from simd-amd.conf +# CONF_FILES += 3rdparty/simd-amd.conf + +all: $(TARGETDIR)/$(TARGETHEADER) $(TARGETDIR)/$(TARGETCPP) +$(TARGETHEADER): $(CONF_FILES) | $(GENERATOR) + cat $^ | perl $(GENERATOR) /dev/stdin $@ > $@ +$(TARGETDIR)/$(TARGETHEADER): header $(TARGETHEADER) + sed '/-- implementation start --/,/-- implementation end --/d' $^ > $@ +$(TARGETDIR)/$(TARGETCPP): $(TARGETHEADER) header + (cat header; echo '#include "$(TARGETHEADER)"'; sed '1,/-- implementation start --/d;/-- implementation end --/,$$d' $<) > $@ + +clean: + -$(RM) $(TARGETHEADER) diff --git a/util/x86simdgen/README.md b/util/x86simdgen/README.md new file mode 100644 index 00000000000..83f554a84ec --- /dev/null +++ b/util/x86simdgen/README.md @@ -0,0 +1,13 @@ +# Scripts to regenerate the x86 SIMD flags + +Upstream: https://github.com/opendcdiag/opendcdiag +License: Apache-2.0 + +The .conf files are meant to be edited and the options we want to use +are uncommented. + +To regenerate: + make + +Note: the license of the script does not affect the produced output's +license. Therefore, no qt_attribution.json file is provided. diff --git a/util/x86simdgen/generate.pl b/util/x86simdgen/generate.pl deleted file mode 100755 index b3e7e99298c..00000000000 --- a/util/x86simdgen/generate.pl +++ /dev/null @@ -1,277 +0,0 @@ -#!/usr/bin/env perl -############################################################################# -## -## Copyright (C) 2018 Intel Corporation. -## Contact: https://www.qt.io/licensing/ -## -## This file is part of the build configuration tools of the Qt Toolkit. -## -## $QT_BEGIN_LICENSE:MIT$ -## Permission is hereby granted, free of charge, to any person obtaining a copy -## of this software and associated documentation files (the "Software"), to deal -## in the Software without restriction, including without limitation the rights -## to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -## copies of the Software, and to permit persons to whom the Software is -## furnished to do so, subject to the following conditions: -## -## The above copyright notice and this permission notice shall be included in -## all copies or substantial portions of the Software. -## -## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -## OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -## THE SOFTWARE. -## $QT_END_LICENSE$ -## -############################################################################# - -use strict; -$\ = "\n"; -$/ = "\n"; -my %leaves = ( - Leaf1EDX => "CPUID Leaf 1, EDX", - Leaf1ECX => "CPUID Leaf 1, ECX", - Leaf7_0EBX => "CPUID Leaf 7, Sub-leaf 0, EBX", - Leaf7_0ECX => "CPUID Leaf 7, Sub-leaf 0, ECX", - Leaf7_0EDX => "CPUID Leaf 7, Sub-leaf 0, EDX", -); -my @leafNames = sort keys %leaves; - -# Read data from stdin -my $i = 1; -my @features; -while () { - s/#.*$//; - chomp; - next if $_ eq ""; - - my ($name, $function, $bit, $depends) = split /\s+/; - die("Unknown CPUID function \"$function\"") - unless grep $function, @leafNames; - - my $id = uc($name); - $id =~ s/[^A-Z0-9_]/_/g; - push @features, - { name => $name, depends => $depends, id => $id, bit => $bit, leaf => $function }; - ++$i; -} - -if (my $h = shift @ARGV) { - open HEADER, ">", $h; - select HEADER; -} - -# Print the qsimd_x86_p.h output -print q{/**************************************************************************** -** -** Copyright (C) 2018 Intel Corporation. -** Contact: https://www.qt.io/licensing/ -** -** This file is part of the QtCore module of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:LGPL$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see https://www.qt.io/terms-conditions. For further -** information use the contact form at https://www.qt.io/contact-us. -** -** GNU Lesser General Public License Usage -** Alternatively, this file may be used under the terms of the GNU Lesser -** General Public License version 3 as published by the Free Software -** Foundation and appearing in the file LICENSE.LGPL3 included in the -** packaging of this file. Please review the following information to -** ensure the GNU Lesser General Public License version 3 requirements -** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. -** -** GNU General Public License Usage -** Alternatively, this file may be used under the terms of the GNU -** General Public License version 2.0 or (at your option) the GNU General -** Public license version 3 or any later version approved by the KDE Free -** Qt Foundation. The licenses are as published by the Free Software -** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 -** included in the packaging of this file. Please review the following -** information to ensure the GNU General Public License requirements will -** be met: https://www.gnu.org/licenses/gpl-2.0.html and -** https://www.gnu.org/licenses/gpl-3.0.html. -** -** $QT_END_LICENSE$ -** -****************************************************************************/ - -// This is a generated file. DO NOT EDIT. -// Please see util/x86simdgen/generate.pl"; -#ifndef QSIMD_P_H -# error "Please include instead" -#endif -#ifndef QSIMD_X86_P_H -#define QSIMD_X86_P_H - -#include "qsimd_p.h" - -// -// W A R N I N G -// ------------- -// -// This file is not part of the Qt API. It exists purely as an -// implementation detail. This header file may change from version to -// version without notice, or even be removed. -// -// We mean it. -// - -QT_BEGIN_NAMESPACE - -// used only to indicate that the CPU detection was initialized -#define QSimdInitialized (Q_UINT64_C(1) << 0)}; - -# Print the enum -my $lastleaf; -for (my $i = 0; $i < scalar @features; ++$i) { - my $feature = $features[$i]; - # Leaf header: - printf "\n// in %s:\n", $leaves{$feature->{leaf}} - if $feature->{leaf} ne $lastleaf; - $lastleaf = $feature->{leaf}; - - # Feature - printf "#define CpuFeature%-33s (Q_UINT64_C(1) << %d)\n", $feature->{id}, $i + 1; - - # Feature string names for Clang and GCC - my $str = $feature->{name}; - $str .= ",$feature->{depends}" if defined($feature->{depends}); - printf "#define QT_FUNCTION_TARGET_STRING_%-17s \"%s\"\n", - $feature->{id}, $str; -} - -print q{ -static const quint64 qCompilerCpuFeatures = 0}; - -# And print the compiler-enabled features part: -for (my $i = 0; $i < scalar @features; ++$i) { - my $feature = $features[$i]; - printf - "#ifdef __%s__\n" . - " | CpuFeature%s\n" . - "#endif\n", - $feature->{id}, $feature->{id}; -} - -print q{ ; - -QT_END_NAMESPACE - -#endif // QSIMD_X86_P_H -}; - -if (my $cpp = shift @ARGV) { - open CPP, ">", $cpp; - select CPP; -} else { - print q{ - ----- cut here, paste the rest into qsimd_x86.cpp --- - - -}; -}; - -print q{/**************************************************************************** -** -** Copyright (C) 2018 Intel Corporation. -** Contact: https://www.qt.io/licensing/ -** -** This file is part of the QtCore module of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:LGPL$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see https://www.qt.io/terms-conditions. For further -** information use the contact form at https://www.qt.io/contact-us. -** -** GNU Lesser General Public License Usage -** Alternatively, this file may be used under the terms of the GNU Lesser -** General Public License version 3 as published by the Free Software -** Foundation and appearing in the file LICENSE.LGPL3 included in the -** packaging of this file. Please review the following information to -** ensure the GNU Lesser General Public License version 3 requirements -** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. -** -** GNU General Public License Usage -** Alternatively, this file may be used under the terms of the GNU -** General Public License version 2.0 or (at your option) the GNU General -** Public license version 3 or any later version approved by the KDE Free -** Qt Foundation. The licenses are as published by the Free Software -** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 -** included in the packaging of this file. Please review the following -** information to ensure the GNU General Public License requirements will -** be met: https://www.gnu.org/licenses/gpl-2.0.html and -** https://www.gnu.org/licenses/gpl-3.0.html. -** -** $QT_END_LICENSE$ -** -****************************************************************************/ - -// This is a generated file. DO NOT EDIT. -// Please see util/x86simdgen/generate.pl"; -#include "qsimd_p.h" -}; - -# Now generate the string table and bit-location array -my $offset = 0; -my @offsets; -print "static const char features_string[] ="; -for my $feature (@features) { - print " \" $feature->{name}\\0\""; - push @offsets, $offset; - $offset += 2 + length($feature->{name}); -} -print " \"\\0\";"; - -# Print the string offset table -printf "\nstatic const %s features_indices[] = {\n %3d", - $offset > 255 ? "quint16" : "quint8", $offset; -for (my $j = 0; $j < scalar @offsets; ++$j) { - printf ",%s%3d", - ($j + 1) % 8 ? " " : "\n ", $offsets[$j]; -} -print "\n};"; - -# Print the locator enum and table -print "\nenum X86CpuidLeaves {"; -map { print " $_," } @leafNames; -print " X86CpuidMaxLeaf\n};"; - -my $type = scalar %leaves > 8 ? "quint16" : "quint8"; -printf "\nstatic const %s x86_locators[] = {", - $type, $type; -my $lastname; -for (my $j = 0; $j < scalar @features; ++$j) { - my $feature = $features[$j]; - printf ", // %s", $lastname - if defined($lastname); - printf "\n %s*32 + %2d", - $feature->{leaf}, $feature->{bit}; - $lastname = $feature->{name}; -} -printf qq{ // $lastname -\}; - -// List of AVX512 features (see detectProcessorFeatures()) -static const quint64 AllAVX512 = 0}; - -# Print AVX512 features -for (my $j = 0; $j < scalar @features; ++$j) { - my $feature = $features[$j]; - $_ = $feature->{id}; - printf "\n | CpuFeature%s", $_ if /AVX512/; -} -print ";"; diff --git a/util/x86simdgen/header b/util/x86simdgen/header new file mode 100644 index 00000000000..163f0454087 --- /dev/null +++ b/util/x86simdgen/header @@ -0,0 +1,39 @@ +/**************************************************************************** +** +** Copyright (C) 2022 Intel Corporation. +** Contact: https://www.qt.io/licensing/ +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or (at your option) the GNU General +** Public license version 3 or any later version approved by the KDE Free +** Qt Foundation. The licenses are as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-2.0.html and +** https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + diff --git a/util/x86simdgen/simd.txt b/util/x86simdgen/simd.txt deleted file mode 100644 index 1fce7b9497f..00000000000 --- a/util/x86simdgen/simd.txt +++ /dev/null @@ -1,37 +0,0 @@ -# Feature CPUID function Bit Required feature -sse2 Leaf1EDX 26 -sse3 Leaf1ECX 0 -ssse3 Leaf1ECX 9 -fma Leaf1ECX 12 -sse4.1 Leaf1ECX 19 -sse4.2 Leaf1ECX 20 -movbe Leaf1ECX 22 -popcnt Leaf1ECX 23 -aes Leaf1ECX 25 sse4.2 -avx Leaf1ECX 28 -f16c Leaf1ECX 29 -rdrnd Leaf1ECX 30 -bmi Leaf7_0EBX 3 -hle Leaf7_0EBX 4 -avx2 Leaf7_0EBX 5 -bmi2 Leaf7_0EBX 8 -rtm Leaf7_0EBX 11 -avx512f Leaf7_0EBX 16 -avx512dq Leaf7_0EBX 17 -rdseed Leaf7_0EBX 18 -avx512ifma Leaf7_0EBX 21 -avx512pf Leaf7_0EBX 26 -avx512er Leaf7_0EBX 27 -avx512cd Leaf7_0EBX 28 -sha Leaf7_0EBX 29 -avx512bw Leaf7_0EBX 30 -avx512vl Leaf7_0EBX 31 -avx512vbmi Leaf7_0ECX 1 -avx512vbmi2 Leaf7_0ECX 6 -gfni Leaf7_0ECX 8 -vaes Leaf7_0ECX 9 -avx512vnni Leaf7_0ECX 11 -avx512bitalg Leaf7_0ECX 12 -avx512vpopcntdq Leaf7_0ECX 14 -avx5124nniw Leaf7_0EDX 2 -avx5124fmaps Leaf7_0EDX 3