qsimd: update the generator script from OpenDCDiag

I'd been making changes to that and improving it for the past 2 years
without bringing it back into Qt.

The list of features is mostly the same, except:
- removed TSX features
- removed features specific to Xeon Phi processors
- added CET and AVX512FP16 features
- added the bit for hybrid CPU detection

See matching update at https://github.com/opendcdiag/opendcdiag/pull/49

Change-Id: I6fcda969a9e9427198bffffd16ce860b5a38aece
Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
This commit is contained in:
Thiago Macieira 2022-01-28 11:31:37 -08:00
parent 05428d9b97
commit b852584556
13 changed files with 1049 additions and 549 deletions

View File

@ -350,22 +350,6 @@ static void xgetbv(uint in, uint &eax, uint &edx)
#endif
}
// Flags from the XCR0 state register
enum XCR0Flags {
X87 = 1 << 0,
XMM0_15 = 1 << 1,
YMM0_15Hi128 = 1 << 2,
BNDRegs = 1 << 3,
BNDCSR = 1 << 4,
OpMask = 1 << 5,
ZMM0_15Hi256 = 1 << 6,
ZMM16_31 = 1 << 7,
SSEState = XMM0_15,
AVXState = XMM0_15 | YMM0_15Hi128,
AVX512State = AVXState | OpMask | ZMM0_15Hi256 | ZMM16_31
};
QT_FUNCTION_TARGET_BASELINE
static quint64 adjustedXcr0(quint64 xcr0)
{
@ -386,7 +370,7 @@ static quint64 adjustedXcr0(quint64 xcr0)
constexpr quintptr cpu_capabilities64 = commpage + 0x10;
quint64 capab = *reinterpret_cast<quint64 *>(cpu_capabilities64);
if (capab & kHasAVX512F)
xcr0 |= AVX512State;
xcr0 |= XSave_Avx512State;
#endif
return xcr0;
@ -395,9 +379,6 @@ static quint64 adjustedXcr0(quint64 xcr0)
QT_FUNCTION_TARGET_BASELINE
static quint64 detectProcessorFeatures()
{
static const quint64 AllAVX = AllAVX512 | CpuFeatureAVX | CpuFeatureAVX2 | CpuFeatureF16C
| CpuFeatureFMA | CpuFeatureVAES;
quint64 features = 0;
int cpuidLevel = maxBasicCpuidSupported();
#if Q_PROCESSOR_X86 < 5
@ -408,38 +389,35 @@ static quint64 detectProcessorFeatures()
#endif
uint results[X86CpuidMaxLeaf] = {};
cpuidFeatures01(results[Leaf1ECX], results[Leaf1EDX]);
cpuidFeatures01(results[Leaf01ECX], results[Leaf01EDX]);
if (cpuidLevel >= 7)
cpuidFeatures07_00(results[Leaf7_0EBX], results[Leaf7_0ECX], results[Leaf7_0EDX]);
cpuidFeatures07_00(results[Leaf07_00EBX], results[Leaf07_00ECX], results[Leaf07_00EDX]);
// populate our feature list
for (uint i = 0; i < sizeof(x86_locators) / sizeof(x86_locators[0]); ++i) {
for (uint i = 0; i < std::size(x86_locators); ++i) {
uint word = x86_locators[i] / 32;
uint bit = 1U << (x86_locators[i] % 32);
quint64 feature = Q_UINT64_C(1) << (i + 1);
quint64 feature = Q_UINT64_C(1) << i;
if (results[word] & bit)
features |= feature;
}
// now check the AVX state
quint64 xcr0 = 0;
if (results[Leaf1ECX] & (1u << 27)) {
if (results[Leaf01ECX] & (1u << 27)) {
// XGETBV enabled
uint xgetbvA = 0, xgetbvD = 0;
xgetbv(0, xgetbvA, xgetbvD);
xcr0 = xgetbvA;
if (sizeof(XCR0Flags) > sizeof(xgetbvA))
if (sizeof(XSaveBits) > sizeof(xgetbvA))
xcr0 |= quint64(xgetbvD) << 32;
xcr0 = adjustedXcr0(xcr0);
}
if ((xcr0 & AVXState) != AVXState) {
// support for YMM registers is disabled, disable all AVX
features &= ~AllAVX;
} else if ((xcr0 & AVX512State) != AVX512State) {
// support for ZMM registers or mask registers is disabled, disable all AVX512
features &= ~AllAVX512;
for (auto req : xsave_requirements) {
if ((xcr0 & req.xsave_state) != req.xsave_state)
features &= ~req.cpu_features;
}
if (features & CpuFeatureRDRND && !checkRdrndWorks())

View File

@ -246,10 +246,12 @@ asm(
# define __haswell__ 1
# endif
QT_BEGIN_NAMESPACE
static const quint64 qCompilerCpuFeatures = _compilerCpuFeatures;
// This constant does not include all CPU features found in a Haswell, only
// those that we'd have optimized code for.
// Note: must use Q_CONSTEXPR here, as this file may be compiled in C mode.
QT_BEGIN_NAMESPACE
static const quint64 CpuFeatureArchHaswell = 0
| CpuFeatureSSE2
| CpuFeatureSSE3

View File

@ -1,6 +1,6 @@
/****************************************************************************
**
** Copyright (C) 2018 Intel Corporation.
** Copyright (C) 2022 Intel Corporation.
** Contact: https://www.qt.io/licensing/
**
** This file is part of the QtCore module of the Qt Toolkit.
@ -37,9 +37,7 @@
**
****************************************************************************/
// This is a generated file. DO NOT EDIT.
// Please see util/x86simdgen/generate.pl
#include "qsimd_p.h"
#include "qsimd_x86_p.h"
static const char features_string[] =
" sse2\0"
@ -55,101 +53,188 @@ static const char features_string[] =
" f16c\0"
" rdrnd\0"
" bmi\0"
" hle\0"
" avx2\0"
" bmi2\0"
" rtm\0"
" avx512f\0"
" avx512dq\0"
" rdseed\0"
" avx512ifma\0"
" avx512pf\0"
" avx512er\0"
" avx512cd\0"
" sha\0"
" avx512bw\0"
" avx512vl\0"
" avx512vbmi\0"
" avx512vbmi2\0"
" shstk\0"
" gfni\0"
" vaes\0"
" avx512vnni\0"
" avx512bitalg\0"
" avx512vpopcntdq\0"
" avx5124nniw\0"
" avx5124fmaps\0"
" hybrid\0"
" ibt\0"
" avx512fp16\0"
"\0";
static const quint16 features_indices[] = {
306, 0, 6, 12, 19, 24, 32, 40,
47, 55, 60, 65, 71, 78, 83, 88,
94, 100, 105, 114, 124, 132, 144, 154,
164, 174, 179, 189, 199, 211, 224, 230,
236, 248, 262, 279, 292
static const uint16_t features_indices[] = {
0, 6, 12, 19, 24, 32, 40, 47,
55, 60, 65, 71, 78, 83, 89, 95,
104, 114, 122, 134, 144, 149, 159, 169,
181, 194, 201, 207, 213, 225, 239, 256,
264, 269,
};
enum X86CpuidLeaves {
Leaf1ECX,
Leaf1EDX,
Leaf7_0EBX,
Leaf7_0ECX,
Leaf7_0EDX,
Leaf01EDX,
Leaf01ECX,
Leaf07_00EBX,
Leaf07_00ECX,
Leaf07_00EDX,
Leaf07_01EAX,
Leaf13_01EAX,
Leaf80000001hECX,
Leaf80000008hEBX,
X86CpuidMaxLeaf
};
static const quint8 x86_locators[] = {
Leaf1EDX*32 + 26, // sse2
Leaf1ECX*32 + 0, // sse3
Leaf1ECX*32 + 9, // ssse3
Leaf1ECX*32 + 12, // fma
Leaf1ECX*32 + 19, // sse4.1
Leaf1ECX*32 + 20, // sse4.2
Leaf1ECX*32 + 22, // movbe
Leaf1ECX*32 + 23, // popcnt
Leaf1ECX*32 + 25, // aes
Leaf1ECX*32 + 28, // avx
Leaf1ECX*32 + 29, // f16c
Leaf1ECX*32 + 30, // rdrnd
Leaf7_0EBX*32 + 3, // bmi
Leaf7_0EBX*32 + 4, // hle
Leaf7_0EBX*32 + 5, // avx2
Leaf7_0EBX*32 + 8, // bmi2
Leaf7_0EBX*32 + 11, // rtm
Leaf7_0EBX*32 + 16, // avx512f
Leaf7_0EBX*32 + 17, // avx512dq
Leaf7_0EBX*32 + 18, // rdseed
Leaf7_0EBX*32 + 21, // avx512ifma
Leaf7_0EBX*32 + 26, // avx512pf
Leaf7_0EBX*32 + 27, // avx512er
Leaf7_0EBX*32 + 28, // avx512cd
Leaf7_0EBX*32 + 29, // sha
Leaf7_0EBX*32 + 30, // avx512bw
Leaf7_0EBX*32 + 31, // avx512vl
Leaf7_0ECX*32 + 1, // avx512vbmi
Leaf7_0ECX*32 + 6, // avx512vbmi2
Leaf7_0ECX*32 + 8, // gfni
Leaf7_0ECX*32 + 9, // vaes
Leaf7_0ECX*32 + 11, // avx512vnni
Leaf7_0ECX*32 + 12, // avx512bitalg
Leaf7_0ECX*32 + 14, // avx512vpopcntdq
Leaf7_0EDX*32 + 2, // avx5124nniw
Leaf7_0EDX*32 + 3 // avx5124fmaps
static const uint16_t x86_locators[] = {
Leaf01EDX*32 + 26, // sse2
Leaf01ECX*32 + 0, // sse3
Leaf01ECX*32 + 9, // ssse3
Leaf01ECX*32 + 12, // fma
Leaf01ECX*32 + 19, // sse4.1
Leaf01ECX*32 + 20, // sse4.2
Leaf01ECX*32 + 22, // movbe
Leaf01ECX*32 + 23, // popcnt
Leaf01ECX*32 + 25, // aes
Leaf01ECX*32 + 28, // avx
Leaf01ECX*32 + 29, // f16c
Leaf01ECX*32 + 30, // rdrnd
Leaf07_00EBX*32 + 3, // bmi
Leaf07_00EBX*32 + 5, // avx2
Leaf07_00EBX*32 + 8, // bmi2
Leaf07_00EBX*32 + 16, // avx512f
Leaf07_00EBX*32 + 17, // avx512dq
Leaf07_00EBX*32 + 18, // rdseed
Leaf07_00EBX*32 + 21, // avx512ifma
Leaf07_00EBX*32 + 28, // avx512cd
Leaf07_00EBX*32 + 29, // sha
Leaf07_00EBX*32 + 30, // avx512bw
Leaf07_00EBX*32 + 31, // avx512vl
Leaf07_00ECX*32 + 1, // avx512vbmi
Leaf07_00ECX*32 + 6, // avx512vbmi2
Leaf07_00ECX*32 + 7, // shstk
Leaf07_00ECX*32 + 8, // gfni
Leaf07_00ECX*32 + 9, // vaes
Leaf07_00ECX*32 + 11, // avx512vnni
Leaf07_00ECX*32 + 12, // avx512bitalg
Leaf07_00ECX*32 + 14, // avx512vpopcntdq
Leaf07_00EDX*32 + 15, // hybrid
Leaf07_00EDX*32 + 20, // ibt
Leaf07_00EDX*32 + 23, // avx512fp16
};
struct X86Architecture
{
uint64_t features;
char name[17 + 1];
};
static const struct X86Architecture x86_architectures[] = {
{ cpu_sapphirerapids, "Sapphire Rapids" },
{ cpu_tigerlake, "Tiger Lake" },
{ cpu_icelake_server, "Ice Lake (Server)" },
{ cpu_icelake_client, "Ice Lake (Client)" },
{ cpu_alderlake, "Alder Lake" },
{ cpu_cooperlake, "Cooper Lake" },
{ cpu_cannonlake, "Cannon Lake" },
{ cpu_cascadelake, "Cascade Lake" },
{ cpu_skylake_avx512, "Skylake (Avx512)" },
{ cpu_skylake, "Skylake" },
{ cpu_tremont, "Tremont" },
{ cpu_broadwell, "Broadwell" },
{ cpu_haswell, "Haswell" },
{ cpu_goldmont, "Goldmont" },
{ cpu_ivybridge, "Ivy Bridge" },
{ cpu_silvermont, "Silvermont" },
{ cpu_sandybridge, "Sandy Bridge" },
{ cpu_westmere, "Westmere" },
{ cpu_core2, "Core2" },
};
enum XSaveBits {
XSave_X87 = 0x0001, // X87 and MMX state
XSave_SseState = 0x0002, // SSE: 128 bits of XMM registers
XSave_Ymm_Hi128 = 0x0004, // AVX: high 128 bits in YMM registers
XSave_Bndregs = 0x0008, // Memory Protection Extensions
XSave_Bndcsr = 0x0010, // Memory Protection Extensions
XSave_OpMask = 0x0020, // AVX512: k0 through k7
XSave_Zmm_Hi256 = 0x0040, // AVX512: high 256 bits of ZMM0-15
XSave_Hi16_Zmm = 0x0080, // AVX512: all 512 bits of ZMM16-31
XSave_PTState = 0x0100, // Processor Trace
XSave_PKRUState = 0x0200, // Protection Key
XSave_CetUState = 0x0800, // CET: user mode
XSave_CetSState = 0x1000, // CET: supervisor mode
XSave_HdcState = 0x2000, // Hardware Duty Cycle
XSave_UintrState = 0x4000, // User Interrupts
XSave_HwpState = 0x10000, // Hardware P-State
XSave_Xtilecfg = 0x20000, // AMX: XTILECFG register
XSave_Xtiledata = 0x40000, // AMX: data in the tiles
XSave_AvxState = XSave_SseState | XSave_Ymm_Hi128,
XSave_MPXState = XSave_Bndregs | XSave_Bndcsr,
XSave_Avx512State = XSave_AvxState | XSave_OpMask | XSave_Zmm_Hi256 | XSave_Hi16_Zmm,
XSave_CetState = XSave_CetUState | XSave_CetSState,
XSave_AmxState = XSave_Xtilecfg | XSave_Xtiledata,
};
// List of features requiring XSave_AvxState
static const uint64_t XSaveReq_AvxState = 0
| cpu_feature_fma
| cpu_feature_avx
| cpu_feature_f16c
| cpu_feature_avx2
| cpu_feature_avx512f
| cpu_feature_avx512dq
| cpu_feature_avx512ifma
| cpu_feature_avx512cd
| cpu_feature_avx512bw
| cpu_feature_avx512vl
| cpu_feature_avx512vbmi
| cpu_feature_avx512vbmi2
| cpu_feature_vaes
| cpu_feature_avx512vnni
| cpu_feature_avx512bitalg
| cpu_feature_avx512vpopcntdq
| cpu_feature_avx512fp16;
// List of features requiring XSave_Avx512State
static const uint64_t XSaveReq_Avx512State = 0
| cpu_feature_avx512f
| cpu_feature_avx512dq
| cpu_feature_avx512ifma
| cpu_feature_avx512cd
| cpu_feature_avx512bw
| cpu_feature_avx512vl
| cpu_feature_avx512vbmi
| cpu_feature_avx512vbmi2
| cpu_feature_avx512vnni
| cpu_feature_avx512bitalg
| cpu_feature_avx512vpopcntdq
| cpu_feature_avx512fp16;
// List of features requiring XSave_CetState
static const uint64_t XSaveReq_CetState = 0
| cpu_feature_shstk;
struct XSaveRequirementMapping
{
uint64_t cpu_features;
uint64_t xsave_state;
};
static const struct XSaveRequirementMapping xsave_requirements[] = {
{ XSaveReq_AvxState, XSave_AvxState },
{ XSaveReq_Avx512State, XSave_Avx512State },
{ XSaveReq_CetState, XSave_CetState },
};
// List of AVX512 features (see detectProcessorFeatures())
static const quint64 AllAVX512 = 0
| CpuFeatureAVX512F
| CpuFeatureAVX512DQ
| CpuFeatureAVX512IFMA
| CpuFeatureAVX512PF
| CpuFeatureAVX512ER
| CpuFeatureAVX512CD
| CpuFeatureAVX512BW
| CpuFeatureAVX512VL
| CpuFeatureAVX512VBMI
| CpuFeatureAVX512VBMI2
| CpuFeatureAVX512VNNI
| CpuFeatureAVX512BITALG
| CpuFeatureAVX512VPOPCNTDQ
| CpuFeatureAVX5124NNIW
| CpuFeatureAVX5124FMAPS;

View File

@ -1,6 +1,6 @@
/****************************************************************************
**
** Copyright (C) 2018 Intel Corporation.
** Copyright (C) 2022 Intel Corporation.
** Contact: https://www.qt.io/licensing/
**
** This file is part of the QtCore module of the Qt Toolkit.
@ -38,224 +38,358 @@
****************************************************************************/
// This is a generated file. DO NOT EDIT.
// Please see util/x86simdgen/generate.pl
#ifndef QSIMD_P_H
# error "Please include <private/qsimd_p.h> instead"
#endif
// Please see 3rdparty/x86simd_generate.pl
#ifndef QSIMD_X86_P_H
#define QSIMD_X86_P_H
#include "qsimd_p.h"
//
// W A R N I N G
// -------------
//
// This file is not part of the Qt API. It exists purely as an
// implementation detail. This header file may change from version to
// version without notice, or even be removed.
//
// We mean it.
//
QT_BEGIN_NAMESPACE
// used only to indicate that the CPU detection was initialized
#define QSimdInitialized (Q_UINT64_C(1) << 0)
#include <stdint.h>
// in CPUID Leaf 1, EDX:
#define CpuFeatureSSE2 (Q_UINT64_C(1) << 1)
#define cpu_feature_sse2 (UINT64_C(1) << 0)
#define QT_FUNCTION_TARGET_STRING_SSE2 "sse2"
// in CPUID Leaf 1, ECX:
#define CpuFeatureSSE3 (Q_UINT64_C(1) << 2)
#define cpu_feature_sse3 (UINT64_C(1) << 1)
#define QT_FUNCTION_TARGET_STRING_SSE3 "sse3"
#define CpuFeatureSSSE3 (Q_UINT64_C(1) << 3)
#define cpu_feature_ssse3 (UINT64_C(1) << 2)
#define QT_FUNCTION_TARGET_STRING_SSSE3 "ssse3"
#define CpuFeatureFMA (Q_UINT64_C(1) << 4)
#define cpu_feature_fma (UINT64_C(1) << 3)
#define QT_FUNCTION_TARGET_STRING_FMA "fma"
#define CpuFeatureSSE4_1 (Q_UINT64_C(1) << 5)
#define cpu_feature_sse4_1 (UINT64_C(1) << 4)
#define QT_FUNCTION_TARGET_STRING_SSE4_1 "sse4.1"
#define CpuFeatureSSE4_2 (Q_UINT64_C(1) << 6)
#define cpu_feature_sse4_2 (UINT64_C(1) << 5)
#define QT_FUNCTION_TARGET_STRING_SSE4_2 "sse4.2"
#define CpuFeatureMOVBE (Q_UINT64_C(1) << 7)
#define cpu_feature_movbe (UINT64_C(1) << 6)
#define QT_FUNCTION_TARGET_STRING_MOVBE "movbe"
#define CpuFeaturePOPCNT (Q_UINT64_C(1) << 8)
#define cpu_feature_popcnt (UINT64_C(1) << 7)
#define QT_FUNCTION_TARGET_STRING_POPCNT "popcnt"
#define CpuFeatureAES (Q_UINT64_C(1) << 9)
#define cpu_feature_aes (UINT64_C(1) << 8)
#define QT_FUNCTION_TARGET_STRING_AES "aes,sse4.2"
#define CpuFeatureAVX (Q_UINT64_C(1) << 10)
#define cpu_feature_avx (UINT64_C(1) << 9)
#define QT_FUNCTION_TARGET_STRING_AVX "avx"
#define CpuFeatureF16C (Q_UINT64_C(1) << 11)
#define QT_FUNCTION_TARGET_STRING_F16C "f16c"
#define CpuFeatureRDRND (Q_UINT64_C(1) << 12)
#define cpu_feature_f16c (UINT64_C(1) << 10)
#define QT_FUNCTION_TARGET_STRING_F16C "f16c,avx"
#define cpu_feature_rdrnd (UINT64_C(1) << 11)
#define QT_FUNCTION_TARGET_STRING_RDRND "rdrnd"
// in CPUID Leaf 7, Sub-leaf 0, EBX:
#define CpuFeatureBMI (Q_UINT64_C(1) << 13)
#define cpu_feature_bmi (UINT64_C(1) << 12)
#define QT_FUNCTION_TARGET_STRING_BMI "bmi"
#define CpuFeatureHLE (Q_UINT64_C(1) << 14)
#define QT_FUNCTION_TARGET_STRING_HLE "hle"
#define CpuFeatureAVX2 (Q_UINT64_C(1) << 15)
#define QT_FUNCTION_TARGET_STRING_AVX2 "avx2"
#define CpuFeatureBMI2 (Q_UINT64_C(1) << 16)
#define cpu_feature_avx2 (UINT64_C(1) << 13)
#define QT_FUNCTION_TARGET_STRING_AVX2 "avx2,avx"
#define cpu_feature_bmi2 (UINT64_C(1) << 14)
#define QT_FUNCTION_TARGET_STRING_BMI2 "bmi2"
#define CpuFeatureRTM (Q_UINT64_C(1) << 17)
#define QT_FUNCTION_TARGET_STRING_RTM "rtm"
#define CpuFeatureAVX512F (Q_UINT64_C(1) << 18)
#define QT_FUNCTION_TARGET_STRING_AVX512F "avx512f"
#define CpuFeatureAVX512DQ (Q_UINT64_C(1) << 19)
#define QT_FUNCTION_TARGET_STRING_AVX512DQ "avx512dq"
#define CpuFeatureRDSEED (Q_UINT64_C(1) << 20)
#define cpu_feature_avx512f (UINT64_C(1) << 15)
#define QT_FUNCTION_TARGET_STRING_AVX512F "avx512f,avx"
#define cpu_feature_avx512dq (UINT64_C(1) << 16)
#define QT_FUNCTION_TARGET_STRING_AVX512DQ "avx512dq,avx512f"
#define cpu_feature_rdseed (UINT64_C(1) << 17)
#define QT_FUNCTION_TARGET_STRING_RDSEED "rdseed"
#define CpuFeatureAVX512IFMA (Q_UINT64_C(1) << 21)
#define QT_FUNCTION_TARGET_STRING_AVX512IFMA "avx512ifma"
#define CpuFeatureAVX512PF (Q_UINT64_C(1) << 22)
#define QT_FUNCTION_TARGET_STRING_AVX512PF "avx512pf"
#define CpuFeatureAVX512ER (Q_UINT64_C(1) << 23)
#define QT_FUNCTION_TARGET_STRING_AVX512ER "avx512er"
#define CpuFeatureAVX512CD (Q_UINT64_C(1) << 24)
#define QT_FUNCTION_TARGET_STRING_AVX512CD "avx512cd"
#define CpuFeatureSHA (Q_UINT64_C(1) << 25)
#define cpu_feature_avx512ifma (UINT64_C(1) << 18)
#define QT_FUNCTION_TARGET_STRING_AVX512IFMA "avx512ifma,avx512f"
#define cpu_feature_avx512cd (UINT64_C(1) << 19)
#define QT_FUNCTION_TARGET_STRING_AVX512CD "avx512cd,avx512f"
#define cpu_feature_sha (UINT64_C(1) << 20)
#define QT_FUNCTION_TARGET_STRING_SHA "sha"
#define CpuFeatureAVX512BW (Q_UINT64_C(1) << 26)
#define QT_FUNCTION_TARGET_STRING_AVX512BW "avx512bw"
#define CpuFeatureAVX512VL (Q_UINT64_C(1) << 27)
#define QT_FUNCTION_TARGET_STRING_AVX512VL "avx512vl"
#define cpu_feature_avx512bw (UINT64_C(1) << 21)
#define QT_FUNCTION_TARGET_STRING_AVX512BW "avx512bw,avx512f"
#define cpu_feature_avx512vl (UINT64_C(1) << 22)
#define QT_FUNCTION_TARGET_STRING_AVX512VL "avx512vl,avx512f"
// in CPUID Leaf 7, Sub-leaf 0, ECX:
#define CpuFeatureAVX512VBMI (Q_UINT64_C(1) << 28)
#define QT_FUNCTION_TARGET_STRING_AVX512VBMI "avx512vbmi"
#define CpuFeatureAVX512VBMI2 (Q_UINT64_C(1) << 29)
#define QT_FUNCTION_TARGET_STRING_AVX512VBMI2 "avx512vbmi2"
#define CpuFeatureGFNI (Q_UINT64_C(1) << 30)
#define cpu_feature_avx512vbmi (UINT64_C(1) << 23)
#define QT_FUNCTION_TARGET_STRING_AVX512VBMI "avx512vbmi,avx512f"
#define cpu_feature_avx512vbmi2 (UINT64_C(1) << 24)
#define QT_FUNCTION_TARGET_STRING_AVX512VBMI2 "avx512vbmi2,avx512f"
#define cpu_feature_shstk (UINT64_C(1) << 25)
#define QT_FUNCTION_TARGET_STRING_SHSTK "shstk"
#define cpu_feature_gfni (UINT64_C(1) << 26)
#define QT_FUNCTION_TARGET_STRING_GFNI "gfni"
#define CpuFeatureVAES (Q_UINT64_C(1) << 31)
#define QT_FUNCTION_TARGET_STRING_VAES "vaes"
#define CpuFeatureAVX512VNNI (Q_UINT64_C(1) << 32)
#define QT_FUNCTION_TARGET_STRING_AVX512VNNI "avx512vnni"
#define CpuFeatureAVX512BITALG (Q_UINT64_C(1) << 33)
#define QT_FUNCTION_TARGET_STRING_AVX512BITALG "avx512bitalg"
#define CpuFeatureAVX512VPOPCNTDQ (Q_UINT64_C(1) << 34)
#define QT_FUNCTION_TARGET_STRING_AVX512VPOPCNTDQ "avx512vpopcntdq"
#define cpu_feature_vaes (UINT64_C(1) << 27)
#define QT_FUNCTION_TARGET_STRING_VAES "vaes,avx2,avx,aes"
#define cpu_feature_avx512vnni (UINT64_C(1) << 28)
#define QT_FUNCTION_TARGET_STRING_AVX512VNNI "avx512vnni,avx512f"
#define cpu_feature_avx512bitalg (UINT64_C(1) << 29)
#define QT_FUNCTION_TARGET_STRING_AVX512BITALG "avx512bitalg,avx512f"
#define cpu_feature_avx512vpopcntdq (UINT64_C(1) << 30)
#define QT_FUNCTION_TARGET_STRING_AVX512VPOPCNTDQ "avx512vpopcntdq,avx512f"
// in CPUID Leaf 7, Sub-leaf 0, EDX:
#define CpuFeatureAVX5124NNIW (Q_UINT64_C(1) << 35)
#define QT_FUNCTION_TARGET_STRING_AVX5124NNIW "avx5124nniw"
#define CpuFeatureAVX5124FMAPS (Q_UINT64_C(1) << 36)
#define QT_FUNCTION_TARGET_STRING_AVX5124FMAPS "avx5124fmaps"
#define cpu_feature_hybrid (UINT64_C(1) << 31)
#define QT_FUNCTION_TARGET_STRING_HYBRID "hybrid"
#define cpu_feature_ibt (UINT64_C(1) << 32)
#define QT_FUNCTION_TARGET_STRING_IBT "ibt"
#define cpu_feature_avx512fp16 (UINT64_C(1) << 33)
#define QT_FUNCTION_TARGET_STRING_AVX512FP16 "avx512fp16,avx512f,f16c"
static const quint64 qCompilerCpuFeatures = 0
// CPU architectures
#define cpu_x86_64 (0 \
| cpu_feature_sse2)
#define cpu_core2 (cpu_x86_64 \
| cpu_feature_sse3 \
| cpu_feature_ssse3)
#define cpu_nhm (cpu_core2 \
| cpu_feature_sse4_1 \
| cpu_feature_sse4_2 \
| cpu_feature_popcnt)
#define cpu_wsm (cpu_nhm)
#define cpu_snb (cpu_wsm \
| cpu_feature_avx)
#define cpu_ivb (cpu_snb \
| cpu_feature_f16c \
| cpu_feature_rdrnd)
#define cpu_hsw (cpu_ivb \
| cpu_feature_avx2 \
| cpu_feature_fma \
| cpu_feature_bmi \
| cpu_feature_bmi2 \
| cpu_feature_movbe)
#define cpu_bdw (cpu_hsw \
| cpu_feature_rdseed)
#define cpu_bdx (cpu_bdw)
#define cpu_skl (cpu_bdw)
#define cpu_adl (cpu_skl \
| cpu_feature_gfni \
| cpu_feature_vaes \
| cpu_feature_shstk \
| cpu_feature_ibt)
#define cpu_skx (cpu_skl \
| cpu_feature_avx512f \
| cpu_feature_avx512dq \
| cpu_feature_avx512cd \
| cpu_feature_avx512bw \
| cpu_feature_avx512vl)
#define cpu_clx (cpu_skx \
| cpu_feature_avx512vnni)
#define cpu_cpx (cpu_clx)
#define cpu_cnl (cpu_skx \
| cpu_feature_avx512ifma \
| cpu_feature_avx512vbmi)
#define cpu_icl (cpu_cnl \
| cpu_feature_avx512vbmi2 \
| cpu_feature_gfni \
| cpu_feature_vaes \
| cpu_feature_avx512vnni \
| cpu_feature_avx512bitalg \
| cpu_feature_avx512vpopcntdq)
#define cpu_icx (cpu_icl)
#define cpu_tgl (cpu_icl \
| cpu_feature_shstk \
| cpu_feature_ibt)
#define cpu_spr (cpu_tgl)
#define cpu_slm (cpu_wsm \
| cpu_feature_rdrnd \
| cpu_feature_movbe)
#define cpu_glm (cpu_slm \
| cpu_feature_rdseed)
#define cpu_tnt (cpu_glm \
| cpu_feature_gfni)
#define cpu_nehalem (cpu_nhm)
#define cpu_westmere (cpu_wsm)
#define cpu_sandybridge (cpu_snb)
#define cpu_ivybridge (cpu_ivb)
#define cpu_haswell (cpu_hsw)
#define cpu_broadwell (cpu_bdw)
#define cpu_skylake (cpu_skl)
#define cpu_skylake_avx512 (cpu_skx)
#define cpu_cascadelake (cpu_clx)
#define cpu_cooperlake (cpu_cpx)
#define cpu_cannonlake (cpu_cnl)
#define cpu_icelake_client (cpu_icl)
#define cpu_icelake_server (cpu_icx)
#define cpu_alderlake (cpu_adl)
#define cpu_sapphirerapids (cpu_spr)
#define cpu_tigerlake (cpu_tgl)
#define cpu_silvermont (cpu_slm)
#define cpu_goldmont (cpu_glm)
#define cpu_tremont (cpu_tnt)
static const uint64_t _compilerCpuFeatures = 0
#ifdef __SSE2__
| CpuFeatureSSE2
| cpu_feature_sse2
#endif
#ifdef __SSE3__
| CpuFeatureSSE3
| cpu_feature_sse3
#endif
#ifdef __SSSE3__
| CpuFeatureSSSE3
| cpu_feature_ssse3
#endif
#ifdef __FMA__
| CpuFeatureFMA
| cpu_feature_fma
#endif
#ifdef __SSE4_1__
| CpuFeatureSSE4_1
| cpu_feature_sse4_1
#endif
#ifdef __SSE4_2__
| CpuFeatureSSE4_2
| cpu_feature_sse4_2
#endif
#ifdef __MOVBE__
| CpuFeatureMOVBE
| cpu_feature_movbe
#endif
#ifdef __POPCNT__
| CpuFeaturePOPCNT
| cpu_feature_popcnt
#endif
#ifdef __AES__
| CpuFeatureAES
| cpu_feature_aes
#endif
#ifdef __AVX__
| CpuFeatureAVX
| cpu_feature_avx
#endif
#ifdef __F16C__
| CpuFeatureF16C
| cpu_feature_f16c
#endif
#ifdef __RDRND__
| CpuFeatureRDRND
| cpu_feature_rdrnd
#endif
#ifdef __BMI__
| CpuFeatureBMI
#endif
#ifdef __HLE__
| CpuFeatureHLE
| cpu_feature_bmi
#endif
#ifdef __AVX2__
| CpuFeatureAVX2
| cpu_feature_avx2
#endif
#ifdef __BMI2__
| CpuFeatureBMI2
#endif
#ifdef __RTM__
| CpuFeatureRTM
| cpu_feature_bmi2
#endif
#ifdef __AVX512F__
| CpuFeatureAVX512F
| cpu_feature_avx512f
#endif
#ifdef __AVX512DQ__
| CpuFeatureAVX512DQ
| cpu_feature_avx512dq
#endif
#ifdef __RDSEED__
| CpuFeatureRDSEED
| cpu_feature_rdseed
#endif
#ifdef __AVX512IFMA__
| CpuFeatureAVX512IFMA
#endif
#ifdef __AVX512PF__
| CpuFeatureAVX512PF
#endif
#ifdef __AVX512ER__
| CpuFeatureAVX512ER
| cpu_feature_avx512ifma
#endif
#ifdef __AVX512CD__
| CpuFeatureAVX512CD
| cpu_feature_avx512cd
#endif
#ifdef __SHA__
| CpuFeatureSHA
| cpu_feature_sha
#endif
#ifdef __AVX512BW__
| CpuFeatureAVX512BW
| cpu_feature_avx512bw
#endif
#ifdef __AVX512VL__
| CpuFeatureAVX512VL
| cpu_feature_avx512vl
#endif
#ifdef __AVX512VBMI__
| CpuFeatureAVX512VBMI
| cpu_feature_avx512vbmi
#endif
#ifdef __AVX512VBMI2__
| CpuFeatureAVX512VBMI2
| cpu_feature_avx512vbmi2
#endif
#ifdef __SHSTK__
| cpu_feature_shstk
#endif
#ifdef __GFNI__
| CpuFeatureGFNI
| cpu_feature_gfni
#endif
#ifdef __VAES__
| CpuFeatureVAES
| cpu_feature_vaes
#endif
#ifdef __AVX512VNNI__
| CpuFeatureAVX512VNNI
| cpu_feature_avx512vnni
#endif
#ifdef __AVX512BITALG__
| CpuFeatureAVX512BITALG
| cpu_feature_avx512bitalg
#endif
#ifdef __AVX512VPOPCNTDQ__
| CpuFeatureAVX512VPOPCNTDQ
| cpu_feature_avx512vpopcntdq
#endif
#ifdef __AVX5124NNIW__
| CpuFeatureAVX5124NNIW
#ifdef __HYBRID__
| cpu_feature_hybrid
#endif
#ifdef __AVX5124FMAPS__
| CpuFeatureAVX5124FMAPS
#ifdef __IBT__
| cpu_feature_ibt
#endif
#ifdef __AVX512FP16__
| cpu_feature_avx512fp16
#endif
;
QT_END_NAMESPACE
#if (defined __cplusplus) && __cplusplus >= 201103L
enum X86CpuFeatures : uint64_t {
CpuFeatureSSE2 = cpu_feature_sse2, ///< Streaming SIMD Extensions 2
CpuFeatureSSE3 = cpu_feature_sse3, ///< Streaming SIMD Extensions 3
CpuFeatureSSSE3 = cpu_feature_ssse3, ///< Supplemental Streaming SIMD Extensions 3
CpuFeatureFMA = cpu_feature_fma, ///< Fused Multiply-Add
CpuFeatureSSE4_1 = cpu_feature_sse4_1, ///< Streaming SIMD Extensions 4.1
CpuFeatureSSE4_2 = cpu_feature_sse4_2, ///< Streaming SIMD Extensions 4.2
CpuFeatureMOVBE = cpu_feature_movbe, ///< MOV Big Endian
CpuFeaturePOPCNT = cpu_feature_popcnt, ///< Population count
CpuFeatureAES = cpu_feature_aes, ///< Advenced Encryption Standard
CpuFeatureAVX = cpu_feature_avx, ///< Advanced Vector Extensions
CpuFeatureF16C = cpu_feature_f16c, ///< 16-bit Float Conversion
CpuFeatureRDRND = cpu_feature_rdrnd, ///< Random number generator
CpuFeatureBMI = cpu_feature_bmi, ///< Bit Manipulation Instructions
CpuFeatureAVX2 = cpu_feature_avx2, ///< Advanced Vector Extensions 2
CpuFeatureBMI2 = cpu_feature_bmi2, ///< Bit Manipulation Instructions 2
CpuFeatureAVX512F = cpu_feature_avx512f, ///< AVX512 Foundation
CpuFeatureAVX512DQ = cpu_feature_avx512dq, ///< AVX512 Double & Quadword
CpuFeatureRDSEED = cpu_feature_rdseed, ///< Random number generator for seeding
CpuFeatureAVX512IFMA = cpu_feature_avx512ifma, ///< AVX512 Integer Fused Multiply-Add
CpuFeatureAVX512CD = cpu_feature_avx512cd, ///< AVX512 Conflict Detection
CpuFeatureSHA = cpu_feature_sha, ///< SHA-1 and SHA-256 instructions
CpuFeatureAVX512BW = cpu_feature_avx512bw, ///< AVX512 Byte & Word
CpuFeatureAVX512VL = cpu_feature_avx512vl, ///< AVX512 Vector Length
CpuFeatureAVX512VBMI = cpu_feature_avx512vbmi, ///< AVX512 Vector Byte Manipulation Instructions
CpuFeatureAVX512VBMI2 = cpu_feature_avx512vbmi2, ///< AVX512 Vector Byte Manipulation Instructions 2
CpuFeatureSHSTK = cpu_feature_shstk, ///< Control Flow Enforcement Technology Shadow Stack
CpuFeatureGFNI = cpu_feature_gfni, ///< Galois Field new instructions
CpuFeatureVAES = cpu_feature_vaes, ///< 256- and 512-bit AES
CpuFeatureAVX512VNNI = cpu_feature_avx512vnni, ///< AVX512 Vector Neural Network Instructions
CpuFeatureAVX512BITALG = cpu_feature_avx512bitalg, ///< AVX512 Bit Algorithms
CpuFeatureAVX512VPOPCNTDQ = cpu_feature_avx512vpopcntdq, ///< AVX512 Population Count
CpuFeatureHYBRID = cpu_feature_hybrid, ///< Hybrid processor
CpuFeatureIBT = cpu_feature_ibt, ///< Control Flow Enforcement Technology Indirect Branch Tracking
CpuFeatureAVX512FP16 = cpu_feature_avx512fp16, ///< AVX512 16-bit Floating Point
}; // enum X86CpuFeatures
#endif // QSIMD_X86_P_H
enum X86CpuArchitectures : uint64_t {
CpuArchx8664 = cpu_x86_64,
CpuArchCore2 = cpu_core2,
CpuArchNHM = cpu_nhm,
CpuArchWSM = cpu_wsm,
CpuArchSNB = cpu_snb,
CpuArchIVB = cpu_ivb,
CpuArchHSW = cpu_hsw,
CpuArchBDW = cpu_bdw,
CpuArchBDX = cpu_bdx,
CpuArchSKL = cpu_skl,
CpuArchADL = cpu_adl,
CpuArchSKX = cpu_skx,
CpuArchCLX = cpu_clx,
CpuArchCPX = cpu_cpx,
CpuArchCNL = cpu_cnl,
CpuArchICL = cpu_icl,
CpuArchICX = cpu_icx,
CpuArchTGL = cpu_tgl,
CpuArchSPR = cpu_spr,
CpuArchSLM = cpu_slm,
CpuArchGLM = cpu_glm,
CpuArchTNT = cpu_tnt,
CpuArchNehalem = cpu_nehalem, ///< Intel Core i3/i5/i7
CpuArchWestmere = cpu_westmere, ///< Intel Core i3/i5/i7
CpuArchSandyBridge = cpu_sandybridge, ///< Second Generation Intel Core i3/i5/i7
CpuArchIvyBridge = cpu_ivybridge, ///< Third Generation Intel Core i3/i5/i7
CpuArchHaswell = cpu_haswell, ///< Fourth Generation Intel Core i3/i5/i7
CpuArchBroadwell = cpu_broadwell, ///< Fifth Generation Intel Core i3/i5/i7
CpuArchSkylake = cpu_skylake, ///< Sixth Generation Intel Core i3/i5/i7
CpuArchSkylakeAvx512 = cpu_skylake_avx512, ///< Intel Xeon Scalable
CpuArchCascadeLake = cpu_cascadelake, ///< Second Generation Intel Xeon Scalable
CpuArchCooperLake = cpu_cooperlake, ///< Third Generation Intel Xeon Scalable
CpuArchCannonLake = cpu_cannonlake, ///< Intel Core i3-8121U
CpuArchIceLakeClient = cpu_icelake_client, ///< Tenth Generation Intel Core i3/i5/i7
CpuArchIceLakeServer = cpu_icelake_server, ///< Third Generation Intel Xeon Scalable
CpuArchAlderLake = cpu_alderlake,
CpuArchSapphireRapids = cpu_sapphirerapids,
CpuArchTigerLake = cpu_tigerlake, ///< Eleventh Generation Intel Core i3/i5/i7
CpuArchSilvermont = cpu_silvermont,
CpuArchGoldmont = cpu_goldmont,
CpuArchTremont = cpu_tremont,
}; // enum X86cpuArchitectures
#endif /* C++11 */
#endif /* QSIMD_X86_P_H */

1
util/x86simdgen/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
qsimd_x86_p.h

35
util/x86simdgen/3rdparty/simd-amd.conf vendored Normal file
View File

@ -0,0 +1,35 @@
# -*- mode: conf; indent-tabs-mode: t -*-
# Feature CPUID function Bit Required feature
#mmxext Leaf80000001hEDX 22 # AMD extensions to MMX
#rdtscp Leaf80000001hEDX 27 # RDTSCP instruction
#3dnow Leaf80000001hEDX 31 # 3DNow! instructions
#3dnowext Leaf80000001hEDX 30 # AMD extensions to 3DNow!
lzcnt Leaf80000001hECX 5 # Leading Zero Count
sse4a Leaf80000001hECX 6 # SSE4a
xop Leaf80000001hECX 11 # eXtended Operations
fma4 Leaf80000001hECX 16 # 4-operand Fused Multiply-Add
tbm Leaf80000001hECX 21 # Trailing Bit Manipulation
clzero Leaf80000008hEBX 0 # Cacheline clear and write zero
wbnoinvd Leaf80000008hEBX 9 # Write Back with No Invalidate
# Processor/arch listing below this line
# Source: GCC gcc/common/config/i386/i386-common.c
# Source: Wikipedia
# Architecture Based on New features
arch=AmdFam10h x86_64 sse3,sse4a,cx16,popcnt,lzcnt # AMD K10
arch=BtVer1 AmdFam10h xsave # AMD Bobcat v1
arch=BtVer2 BtVer1 ssse3,sse4.1,sse4.2,avx,bmi,f16c,movbe,xsaveopt # AMD Bobcat v2
arch=BdVer1 BtVer1 ssse3,sse4.1,sse4.2,avx,xop,fma4
arch=BdVer2 BdVer1 bmi,f16c,fma,tbm
arch=BdVer3 BdVer2 fsgsbase,xsaveopt
arch=BdVer4 BdVer3 avx2,bmi2,rdrnd,movbe
arch=ZnVer1 BdVer4 adx,rdseed,clzero,clfushopt,xsavec,xsaves
arch=ZnVer2 ZnVer1 clwb,wbnoinvd
arch=Barcelona AmdFam10h
arch=Bulldozer BdVer1 # AMD Bulldozer
arch=Piledriver BdVer2 # AMD Bulldozer v2 (Piledriver)
arch=Steamroller BdVer3 # AMD Bulldozer v3 (Steamroller)
arch=Excavator BdVer4 # AMD Bulldozer v4 (Excavator)
arch=Zen ZnVer1 # AMD Zen
arch=Zen2 ZnVer2 # AMD Zen2

179
util/x86simdgen/3rdparty/simd-intel.conf vendored Normal file
View File

@ -0,0 +1,179 @@
# -*- mode: conf; indent-tabs-mode: t -*-
# Feature CPUID function Bit Required feature
#sep Leaf01EDX 11 # Sysenter/sysexit
#cmov Leaf01EDX 15 # Conditional Move
#clflush Leaf01EDX 19 # Cache-Line Flush
#mmx Leaf01EDX 23 # Multi Media Extensions
#fxsr Leaf01EDX 24 # FXSAVE instruction
#sse Leaf01EDX 25 # Streaming SIMD Extensions
sse2 Leaf01EDX 26 # Streaming SIMD Extensions 2
# -- everything above this line is mandatory on x86-64 --
sse3 Leaf01ECX 0 # Streaming SIMD Extensions 3
#pclmul Leaf01ECX 1 # Carryless Multiply
ssse3 Leaf01ECX 9 # Supplemental Streaming SIMD Extensions 3
fma Leaf01ECX 12 # Fused Multiply-Add
#cx16 Leaf01ECX 13 # Compare-Exchange 16 bytes
sse4.1 Leaf01ECX 19 # Streaming SIMD Extensions 4.1
sse4.2 Leaf01ECX 20 # Streaming SIMD Extensions 4.2
movbe Leaf01ECX 22 # MOV Big Endian
popcnt Leaf01ECX 23 # Population count
aes Leaf01ECX 25 sse4.2 # Advenced Encryption Standard
#xsave Leaf01ECX 26 # XSAVE, XGETBV instructions
#osxsave Leaf01ECX 27 # XSAVE enabled by OS
avx Leaf01ECX 28 # Advanced Vector Extensions
f16c Leaf01ECX 29 avx # 16-bit Float Conversion
rdrnd Leaf01ECX 30 # Random number generator
#hypervisor Leaf01ECX 31 # Running on a hypervisor
#fsgsbase Leaf07_00EBX 0 # FS/GS base access
bmi Leaf07_00EBX 3 # Bit Manipulation Instructions
#hle Leaf07_00EBX 4 # Hardware Lock Ellision
avx2 Leaf07_00EBX 5 avx # Advanced Vector Extensions 2
bmi2 Leaf07_00EBX 8 # Bit Manipulation Instructions 2
#erms Leaf07_00EBX 9 # Enhanced REP MOVSB/STOSB
#rtm Leaf07_00EBX 11 # Restricted Transactional Memory
#rdt_m Leaf07_00EBX 12 # Resource Director Technology (RDT) Monitoring
#mpx Leaf07_00EBX 14 # Memory Protection Extensions
#rdt_a Leaf07_00EBX 12 # Resource Director Technology (RDT) Allocation
avx512f Leaf07_00EBX 16 avx # AVX512 Foundation
avx512dq Leaf07_00EBX 17 avx512f # AVX512 Double & Quadword
rdseed Leaf07_00EBX 18 # Random number generator for seeding
#adx Leaf07_00EBX 19 # Multi-Precision Add-Carry
avx512ifma Leaf07_00EBX 21 avx512f # AVX512 Integer Fused Multiply-Add
#clflushopt Leaf07_00EBX 23 # Cache-Fline Flush Optimized
#clwb Leaf07_00EBX 24 # Cache-Line Write Back
#avx512pf Leaf07_00EBX 26 avx512f # AVX512 Prefetch
#avx512er Leaf07_00EBX 27 avx512f # AVX512 Exponential & Reciprocal
avx512cd Leaf07_00EBX 28 avx512f # AVX512 Conflict Detection
sha Leaf07_00EBX 29 # SHA-1 and SHA-256 instructions
avx512bw Leaf07_00EBX 30 avx512f # AVX512 Byte & Word
avx512vl Leaf07_00EBX 31 avx512f # AVX512 Vector Length
avx512vbmi Leaf07_00ECX 1 avx512f # AVX512 Vector Byte Manipulation Instructions
#pku Leaf07_00ECX 3 # Protection Keys for User mode
#ospke Leaf07_00ECX 4 # Protection Keys Enabled by OS
#waitpkg Leaf07_00ECX 5 # User-Level Monitor / Wait
avx512vbmi2 Leaf07_00ECX 6 avx512f # AVX512 Vector Byte Manipulation Instructions 2
shstk Leaf07_00ECX 7 # Control Flow Enforcement Technology Shadow Stack
gfni Leaf07_00ECX 8 # Galois Field new instructions
vaes Leaf07_00ECX 9 avx2,avx,aes # 256- and 512-bit AES
#vpclmulqdq Leaf07_00ECX 10 avx # 256- and 512-bit Carryless Multiply
avx512vnni Leaf07_00ECX 11 avx512f # AVX512 Vector Neural Network Instructions
avx512bitalg Leaf07_00ECX 12 avx512f # AVX512 Bit Algorithms
avx512vpopcntdq Leaf07_00ECX 14 avx512f # AVX512 Population Count
#la57 Leaf07_00ECX 16 # 5-level page tables
#rdpid Leaf07_00ECX 22 # RDPID instruction
#cldemote Leaf07_00ECX 25 # Cache Line Demotion
#movdiri Leaf07_00ECX 27 # Move Direct-store Integer
#movdir64b Leaf07_00ECX 28 # Move Direct-store 64 bytes
#enqcmd Leaf07_00ECX 29 # Enqueue Command
#pks Leaf07_00ECX 31 # Protection Keys for Supervisor mode
#avx5124nniw Leaf07_00EDX 2 avx512f # AVX512 4-iteration Vector Neural Network Instructions
#avx5124fmaps Leaf07_00EDX 3 avx512f # AVX512 4-iteration Fused Multiply Accumulation
#fsrm Leaf07_00EDX 4 # Fast Short REP MOV
#uintr Leaf07_00EDX 5 # User interrupts
#avx512vp2intersect Leaf07_00EDX 8 avx512f # AVX512 Intersection computation
#serialize Leaf07_00EDX 14 # SERIALIZE instruction
hybrid Leaf07_00EDX 15 # Hybrid processor
#tsxldtrk Leaf07_00EDX 16 # TDX (RTM) Suspend Load Address Tracking
#pconfig Leaf07_00EDX 18 # Platform configuration
ibt Leaf07_00EDX 20 # Control Flow Enforcement Technology Indirect Branch Tracking
#amxbf16 Leaf07_00EDX 22 amxtile # AMX Tile multiplication in BFloat16
avx512fp16 Leaf07_00EDX 23 avx512f,f16c # AVX512 16-bit Floating Point
#amxtile Leaf07_00EDX 24 # Advanced Matrix Extensions Tile support
#amxint8 Leaf07_00EDX 25 amxtile # AMX Tile multiplication for Int8
#avxvnni Leaf07_01EAX 4 avx # AVX (VEX-encoded) versions of the Vector Neural Network Instructions
#avx512bf16 Leaf07_01EAX 5 avx512f # AVX512 Brain Float16
#zlmovsb Leaf07_01EAX 10 # Zero-length MOVSB
#fsrs Leaf07_01EAX 11 # Fast Short (REP?) STOSB
#fsrc Leaf07_01EAX 12 # Fast Short (REP?) CMPSB, SCASB
#fred Leaf07_01EAX 17 # Flexible Return and Event Delivery
#lkgs Leaf07_01EAX 18 # Load into Kernel GS
#lam Leaf07_01EAX 26 # Linear Address Masking
#xsaveopt Leaf13_01EAX 0 # Optimized XSAVE
#xsavec Leaf13_01EAX 1 # XSAVE with Compaction
#xgetbv1 Leaf13_01EAX 2 # XGETBV with ECX=1
#xsaves Leaf13_01EAX 3 # XSAVE Supervisor mode
#xfd Leaf13_01EAX 4 # eXtended Feature Disable MSR
#lzcnt Leaf80000001hECX 5 # Leading Zero Count
# XSAVE states
# Source: Intel Software Development Manual, Volume 1, Chapter 13
# Source: Intel Instruction Set Extensions Manual (ed. 041), Chapter 3, "Intel AMX Instruction Set"
# Grouping Value Required for
xsave=X87 0x0001 # X87 and MMX state
xsave=SseState 0x0002 sse # SSE: 128 bits of XMM registers
xsave=Ymm_Hi128 0x0004 # AVX: high 128 bits in YMM registers
xsave=Bndregs 0x0008 # Memory Protection Extensions
xsave=Bndcsr 0x0010 # Memory Protection Extensions
xsave=OpMask 0x0020 # AVX512: k0 through k7
xsave=Zmm_Hi256 0x0040 # AVX512: high 256 bits of ZMM0-15
xsave=Hi16_Zmm 0x0080 # AVX512: all 512 bits of ZMM16-31
xsave=PTState 0x0100 # Processor Trace
xsave=PKRUState 0x0200 pku # Protection Key
# ??? 0x0400
xsave=CetUState 0x0800 # CET: user mode
xsave=CetSState 0x1000 # CET: supervisor mode
xsave=HdcState 0x2000 # Hardware Duty Cycle
xsave=UintrState 0x4000 uintr # User Interrupts
# ??? 0x8000
xsave=HwpState 0x10000 # Hardware P-State
xsave=Xtilecfg 0x20000 # AMX: XTILECFG register
xsave=Xtiledata 0x40000 # AMX: data in the tiles
xsave=AvxState SseState|Ymm_Hi128 avx,fma,avx512f
xsave=MPXState Bndregs|Bndcsr mpx
xsave=Avx512State AvxState|OpMask|Zmm_Hi256|Hi16_Zmm avx512f
xsave=CetState CetUState|CetSState shstk
xsave=AmxState Xtilecfg|Xtiledata amxtile
# Processor/arch listing below this line
# Source: Intel Instruction Set Extension manual, section 1.2
# Source: GCC gcc/config/i386/i386.h, i386-c.c, i386-builtins.c
# Architecture Based on New features Optional features
arch=x86_64 <> sse2
# Core line
arch=Core2 x86_64 sse3,ssse3,cx16
arch=NHM Core2 sse4.1,sse4.2,popcnt
arch=WSM NHM
arch=SNB WSM avx
arch=IVB SNB f16c,rdrnd,fsgsbase
arch=HSW IVB avx2,fma,bmi,bmi2,lzcnt,movbe
arch=BDW HSW adx,rdseed
arch=BDX BDW
arch=SKL BDW xsavec,xsaves
arch=ADL SKL avxvnni,gfni,vaes,vpclmulqdq,serialize,shstk,cldemote,movdiri,movdir64b,ibt,waitpkg,keylocker rdpid
arch=SKX SKL avx512f,avx512dq,avx512cd,avx512bw,avx512vl clwb
arch=CLX SKX avx512vnni
arch=CPX CLX avx512bf16
arch=CNL SKX avx512ifma,avx512vbmi sha
arch=ICL CNL avx512vbmi2,gfni,vaes,vpclmulqdq,avx512vnni,avx512bitalg,avx512vpopcntdq fsrm,rdpid
arch=ICX ICL pconfig
arch=TGL ICL avx512vp2intersect,shstk,,movdiri,movdir64b,ibt,keylocker
arch=SPR TGL avx512bf16,amxtile,amxbf16,amxint8,avxvnni,cldemote,pconfig,waitpkg,serialize,tsxldtrk,uintr
# Atom line
arch=SLM WSM rdrnd,movbe
arch=GLM SLM fsgsbase,rdseed,lzcnt,xsavec,xsaves
arch=TNT GLM clwb,gfni,cldemote,waitpkg,movdiri,movdir64b
# Xeon Phi line
#arch=KNL SKL avx512f,avx512er,avx512pf,avx512cd
#arch=KNM KNL avx5124fmaps,avx5124vnniw,avx512vpopcntdq
# Longer names
arch=Nehalem NHM # Intel Core i3/i5/i7
arch=Westmere WSM # Intel Core i3/i5/i7
arch=SandyBridge SNB # Second Generation Intel Core i3/i5/i7
arch=IvyBridge IVB # Third Generation Intel Core i3/i5/i7
arch=Haswell HSW # Fourth Generation Intel Core i3/i5/i7
arch=Broadwell BDW # Fifth Generation Intel Core i3/i5/i7
arch=Skylake SKL # Sixth Generation Intel Core i3/i5/i7
arch=Skylake-Avx512 SKX # Intel Xeon Scalable
arch=CascadeLake CLX # Second Generation Intel Xeon Scalable
arch=CooperLake CPX # Third Generation Intel Xeon Scalable
arch=CannonLake CNL # Intel Core i3-8121U
arch=IceLake-Client ICL # Tenth Generation Intel Core i3/i5/i7
arch=IceLake-Server ICX # Third Generation Intel Xeon Scalable
arch=AlderLake ADL
arch=SapphireRapids SPR
arch=TigerLake TGL # Eleventh Generation Intel Core i3/i5/i7
arch=Silvermont SLM
arch=Goldmont GLM
arch=Tremont TNT
#arch=KnightsLanding KNL
#arch=KnightsMill KNM

329
util/x86simdgen/3rdparty/x86simd_generate.pl vendored Executable file
View File

@ -0,0 +1,329 @@
#!/usr/bin/env perl
# SPDX-License-Identifier: Apache-2.0
use strict;
$\ = "\n";
$/ = "\n";
my $debug = 0;
my %leaves = (
Leaf01ECX => "CPUID Leaf 1, ECX",
Leaf07_00EBX => "CPUID Leaf 7, Sub-leaf 0, EBX",
Leaf07_00ECX => "CPUID Leaf 7, Sub-leaf 0, ECX",
Leaf07_00EDX => "CPUID Leaf 7, Sub-leaf 0, EDX",
Leaf07_01EAX => "CPUID Leaf 7, Sub-leaf 1, EAX",
Leaf13_01EAX => "CPUID Leaf 13, Sub-leaf 1, EAX",
Leaf80000001hECX => "CPUID Leaf 80000001h, ECX",
Leaf80000008hEBX => "CPUID Leaf 80000008h, EBX",
);
my @leafNames = sort keys %leaves;
# out of order (we want it first)
unshift @leafNames, "Leaf01EDX";
$leaves{Leaf01EDX} = "CPUID Leaf 1, EDX";
# Read input from file specified by first argument
my $input_conf_file = shift @ARGV;
open(FH, '<', $input_conf_file) or die $!;
my $i = 0;
my @features;
my @architecture_names;
my %architectures;
my @xsaveStates;
my $maxarchnamelen = 0;
while (<FH>) {
chomp $_;
m/#\s*(.*)\s*/;
my $comment = $1;
s/#.*$//;
s/^\s+//;
next if $_ eq "";
if (s/^arch=//) {
my ($arch, $based, $f) = split /\s+/;
die("Unknown base architecture \"$based\"")
unless $based eq "<>" or grep {$_ eq $based} @architecture_names;
my $id = lc($arch);
$id =~ s/[^A-Za-z0-9_]/_/g;
my $prettyname = $arch;
$prettyname =~ s/\B([A-Z])/ $1/g;
$prettyname =~ s/-(\w+)/ ($1)/g;
$maxarchnamelen = length($prettyname) if length($prettyname) > $maxarchnamelen;
my @basefeatures;
my @extrafeatures;
@basefeatures = @{$architectures{$based}->{allfeatures}} if $based ne "<>";
@extrafeatures = @{$architectures{$arch}{features}} if defined($architectures{$arch});
@extrafeatures = (@extrafeatures, split(',', $f));
my @allfeatures = sort (@basefeatures, @extrafeatures);
$architectures{$arch} = {
name => $arch,
prettyname => $prettyname,
id => $id,
base => $based,
features => \@extrafeatures,
allfeatures => \@allfeatures,
comment => $comment
};
push @architecture_names, $arch
unless grep {$_ eq $arch} @architecture_names;
} elsif (s/^xsave=//) {
my ($name, $value, $required) = split /\s+/;
push @xsaveStates,
{ id => $name, value => $value, required_for => $required, comment => $comment };
} else {
my ($name, $function, $bit, $depends) = split /\s+/;
die("Unknown CPUID function \"$function\"")
unless grep {$_ eq $function} @leafNames;
if (my @match = grep { $_->{name} eq $name } @features) {
die("internal error") if scalar @match != 1;
next if $match[0]->{function} eq $function &&
$match[0]->{bit} eq $bit && $match[0]->{depends} eq $depends;
die("Duplicate feature \"$name\" with different details. " .
"Previously was $match[0]->{function} bit $match[0]->{bit}.");
}
my $id = uc($name);
$id =~ s/[^A-Z0-9_]/_/g;
push @features,
{ name => $name, depends => $depends, id => $id, bit => $bit, leaf => $function, comment => $comment };
++$i;
die("Too many features to fit a 64-bit integer") if $i > 64;
}
}
close FH;
# Print the header output
my $headername = "";
my $headerguard = "";
if ($headername = shift @ARGV) {
$headerguard = uc($headername);
$headerguard =~ s/[^A-Z0-9_]/_/g;
print qq|// This is a generated file. DO NOT EDIT.
// Please see $0
#ifndef $headerguard
#define $headerguard
#include <stdint.h>|;
} else {
$debug = 1;
}
# Print the feature list
my $lastleaf;
for (my $i = 0; $i < scalar @features; ++$i) {
my $feature = $features[$i];
# Leaf header:
printf "\n// in %s:\n", $leaves{$feature->{leaf}}
if $feature->{leaf} ne $lastleaf;
$lastleaf = $feature->{leaf};
# Feature
printf "#define cpu_feature_%-31s (UINT64_C(1) << %d)\n", lc($feature->{id}), $i;
# Feature string names for Clang and GCC
my $str = $feature->{name} . ',' . $feature->{depends};
$str =~ s/,$//;
printf "#define QT_FUNCTION_TARGET_STRING_%-17s \"%s\"\n",
$feature->{id}, $str;
}
# Print the architecture list
print "\n// CPU architectures";
for (@architecture_names) {
my $arch = $architectures{$_};
my $base = $arch->{base};
if ($base eq "<>") {
$base = "0";
} else {
$base =~ s/[^A-Za-z0-9_]/_/g;
$base = "cpu_" . $base;
}
printf "#define cpu_%-19s (%s", lc($arch->{id}), lc($base);
for my $f (@{$arch->{features}}) {
my @match = grep { $_->{name} eq $f } @features;
if (scalar @match == 1) {
printf " \\\n%33s| cpu_feature_%s", " ", lc($match[0]->{id});
} else {
printf STDERR "%s: unknown feature '%s' for CPU '%s'\n", $0, $f, $arch->{name}
if $debug;
}
}
print ")";
}
print q{
static const uint64_t _compilerCpuFeatures = 0};
# And print the compiler-enabled features part:
for (my $i = 0; $i < scalar @features; ++$i) {
my $feature = $features[$i];
printf
"#ifdef __%s__\n" .
" | cpu_feature_%s\n" .
"#endif\n",
$feature->{id}, lc($feature->{id});
}
print ' ;';
if ($headerguard ne "") {
print q|
#if (defined __cplusplus) && __cplusplus >= 201103L
enum X86CpuFeatures : uint64_t {|;
for (@features) {
my $line = sprintf "CpuFeature%s = cpu_feature_%s,", $_->{id}, lc($_->{id});
if ($_->{comment} ne "") {
printf " %-56s ///< %s\n", $line, $_->{comment};
} else {
print " $line";
}
}
print qq|}; // enum X86CpuFeatures
enum X86CpuArchitectures : uint64_t {|;
for (@architecture_names) {
my $arch = $architectures{$_};
my $name = $arch->{name};
$name =~ s/[^A-Za-z0-9]//g;
my $line = sprintf "CpuArch%s = cpu_%s,", $name, lc($arch->{id});
if ($arch->{comment} ne "") {
printf " %-56s ///< %s\n", $line, $arch->{comment};
} else {
print " $line";
}
}
print qq|}; // enum X86cpuArchitectures
#endif /* C++11 */\n|;
};
print "// -- implementation start --\n";
# Now generate the string table and bit-location array
my $offset = 0;
my @offsets;
print "static const char features_string[] =";
for my $feature (@features) {
print " \" $feature->{name}\\0\"";
push @offsets, $offset;
$offset += 2 + length($feature->{name});
}
print " \"\\0\";";
# Print the string offset table
printf "\nstatic const %s features_indices[] = {",
$offset > 255 ? "uint16_t" : "uint8_t";
for (my $j = 0; $j < scalar @offsets; ++$j) {
printf "%s%3d,",
$j % 8 ? " " : "\n ", $offsets[$j];
}
print "\n};";
# Print the locator enum and table
print "\nenum X86CpuidLeaves {";
map { print " $_," } @leafNames;
print " X86CpuidMaxLeaf\n};";
my $type = scalar %leaves > 8 ? "uint16_t" : "uint8_t";
printf "\nstatic const %s x86_locators[] = {\n",
$type, $type;
for (my $j = 0; $j < scalar @features; ++$j) {
my $feature = $features[$j];
printf " %s*32 + %2d, %s// %s\n",
$feature->{leaf}, $feature->{bit}, ' ' x (24 - length($feature->{leaf})), $feature->{name};
}
print '};';
# Generate the processor name listing, sorted by feature length
my %sorted_archs;
for (@architecture_names) {
my $arch = $architectures{$_};
my $key = sprintf "%02d_%s", scalar(@{$arch->{allfeatures}}), join(',', @{$arch->{allfeatures}});
$sorted_archs{$key} = $arch;
}
print qq|
struct X86Architecture
{
uint64_t features;
char name[$maxarchnamelen + 1];
};
static const struct X86Architecture x86_architectures[] = {|;
for (sort { $b <=> $a } keys %sorted_archs) {
my $arch = $sorted_archs{$_};
next if $arch->{base} eq "<>";
printf " { cpu_%s, \"%s\" },\n", $arch->{id}, $arch->{prettyname};
}
print "};";
# Produce the list of XSAVE states
print "\nenum XSaveBits {";
my $xsaveEnumPrefix = "XSave_";
for my $state (@xsaveStates) {
my $value = $state->{value};
unless ($value =~ /^0x/) {
# Compound value
$value = join(" | ", map { $xsaveEnumPrefix . $_ } split(/\|/, $value));
}
printf " %s%-12s = %s,", $xsaveEnumPrefix, $state->{id}, $value;
printf "%s// %s", ' ' x (18 - length($value)), $state->{comment}
if $state->{comment} ne '';
printf "\n";
};
print "};";
# Produce a list of features require extended XSAVE state
my $xsaveRequirementMapping;
for my $state (@xsaveStates) {
my $xsaveReqPrefix = "XSaveReq_";
my @required_for = split /,/, $state->{required_for};
next unless scalar @required_for;
my $prefix = sprintf "\n// List of features requiring %s%s\nstatic const uint64_t %s%s = 0",
$xsaveEnumPrefix, $state->{id}, $xsaveReqPrefix, $state->{id};
# match either the feature name or one of its requirements against list
# of features that this state is required for
for my $feature (@features) {
my $id = lc($feature->{id});
my $required = 0;
for my $requirement (@required_for) {
my @depends = split /,/, "$id," . $feature->{depends};
$required = grep { $_ eq $requirement } @depends;
last if $required;
}
printf "$prefix\n | cpu_feature_%s", $id if $required;
$prefix = "" if $required;
}
if ($prefix eq "") {
# we printed something
print ";";
$xsaveRequirementMapping .= sprintf " { %s%s, %s%s },\n",
$xsaveReqPrefix, $state->{id}, $xsaveEnumPrefix, $state->{id};
}
}
# Finally, make a table
printf qq|
struct XSaveRequirementMapping
{
uint64_t cpu_features;
uint64_t xsave_state;
};
static const struct XSaveRequirementMapping xsave_requirements[] = {
%s};
// -- implementation end --
#endif /* $headerguard */\n|, $xsaveRequirementMapping if $xsaveRequirementMapping ne "";

19
util/x86simdgen/Makefile Normal file
View File

@ -0,0 +1,19 @@
GENERATOR = 3rdparty/x86simd_generate.pl
TARGETDIR = ../../src/corelib/global/
TARGETCPP = qsimd_x86.cpp
TARGETHEADER = qsimd_x86_p.h
CONF_FILES = 3rdparty/simd-intel.conf
# We don't currently use any feature from simd-amd.conf
# CONF_FILES += 3rdparty/simd-amd.conf
all: $(TARGETDIR)/$(TARGETHEADER) $(TARGETDIR)/$(TARGETCPP)
$(TARGETHEADER): $(CONF_FILES) | $(GENERATOR)
cat $^ | perl $(GENERATOR) /dev/stdin $@ > $@
$(TARGETDIR)/$(TARGETHEADER): header $(TARGETHEADER)
sed '/-- implementation start --/,/-- implementation end --/d' $^ > $@
$(TARGETDIR)/$(TARGETCPP): $(TARGETHEADER) header
(cat header; echo '#include "$(TARGETHEADER)"'; sed '1,/-- implementation start --/d;/-- implementation end --/,$$d' $<) > $@
clean:
-$(RM) $(TARGETHEADER)

13
util/x86simdgen/README.md Normal file
View File

@ -0,0 +1,13 @@
# Scripts to regenerate the x86 SIMD flags
Upstream: https://github.com/opendcdiag/opendcdiag
License: Apache-2.0
The .conf files are meant to be edited and the options we want to use
are uncommented.
To regenerate:
make
Note: the license of the script does not affect the produced output's
license. Therefore, no qt_attribution.json file is provided.

View File

@ -1,277 +0,0 @@
#!/usr/bin/env perl
#############################################################################
##
## Copyright (C) 2018 Intel Corporation.
## Contact: https://www.qt.io/licensing/
##
## This file is part of the build configuration tools of the Qt Toolkit.
##
## $QT_BEGIN_LICENSE:MIT$
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to deal
## in the Software without restriction, including without limitation the rights
## to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
## copies of the Software, and to permit persons to whom the Software is
## furnished to do so, subject to the following conditions:
##
## The above copyright notice and this permission notice shall be included in
## all copies or substantial portions of the Software.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
## OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
## THE SOFTWARE.
## $QT_END_LICENSE$
##
#############################################################################
use strict;
$\ = "\n";
$/ = "\n";
my %leaves = (
Leaf1EDX => "CPUID Leaf 1, EDX",
Leaf1ECX => "CPUID Leaf 1, ECX",
Leaf7_0EBX => "CPUID Leaf 7, Sub-leaf 0, EBX",
Leaf7_0ECX => "CPUID Leaf 7, Sub-leaf 0, ECX",
Leaf7_0EDX => "CPUID Leaf 7, Sub-leaf 0, EDX",
);
my @leafNames = sort keys %leaves;
# Read data from stdin
my $i = 1;
my @features;
while (<STDIN>) {
s/#.*$//;
chomp;
next if $_ eq "";
my ($name, $function, $bit, $depends) = split /\s+/;
die("Unknown CPUID function \"$function\"")
unless grep $function, @leafNames;
my $id = uc($name);
$id =~ s/[^A-Z0-9_]/_/g;
push @features,
{ name => $name, depends => $depends, id => $id, bit => $bit, leaf => $function };
++$i;
}
if (my $h = shift @ARGV) {
open HEADER, ">", $h;
select HEADER;
}
# Print the qsimd_x86_p.h output
print q{/****************************************************************************
**
** Copyright (C) 2018 Intel Corporation.
** Contact: https://www.qt.io/licensing/
**
** This file is part of the QtCore module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see https://www.qt.io/terms-conditions. For further
** information use the contact form at https://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 3 as published by the Free Software
** Foundation and appearing in the file LICENSE.LGPL3 included in the
** packaging of this file. Please review the following information to
** ensure the GNU Lesser General Public License version 3 requirements
** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 2.0 or (at your option) the GNU General
** Public license version 3 or any later version approved by the KDE Free
** Qt Foundation. The licenses are as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
** included in the packaging of this file. Please review the following
** information to ensure the GNU General Public License requirements will
** be met: https://www.gnu.org/licenses/gpl-2.0.html and
** https://www.gnu.org/licenses/gpl-3.0.html.
**
** $QT_END_LICENSE$
**
****************************************************************************/
// This is a generated file. DO NOT EDIT.
// Please see util/x86simdgen/generate.pl";
#ifndef QSIMD_P_H
# error "Please include <private/qsimd_p.h> instead"
#endif
#ifndef QSIMD_X86_P_H
#define QSIMD_X86_P_H
#include "qsimd_p.h"
//
// W A R N I N G
// -------------
//
// This file is not part of the Qt API. It exists purely as an
// implementation detail. This header file may change from version to
// version without notice, or even be removed.
//
// We mean it.
//
QT_BEGIN_NAMESPACE
// used only to indicate that the CPU detection was initialized
#define QSimdInitialized (Q_UINT64_C(1) << 0)};
# Print the enum
my $lastleaf;
for (my $i = 0; $i < scalar @features; ++$i) {
my $feature = $features[$i];
# Leaf header:
printf "\n// in %s:\n", $leaves{$feature->{leaf}}
if $feature->{leaf} ne $lastleaf;
$lastleaf = $feature->{leaf};
# Feature
printf "#define CpuFeature%-33s (Q_UINT64_C(1) << %d)\n", $feature->{id}, $i + 1;
# Feature string names for Clang and GCC
my $str = $feature->{name};
$str .= ",$feature->{depends}" if defined($feature->{depends});
printf "#define QT_FUNCTION_TARGET_STRING_%-17s \"%s\"\n",
$feature->{id}, $str;
}
print q{
static const quint64 qCompilerCpuFeatures = 0};
# And print the compiler-enabled features part:
for (my $i = 0; $i < scalar @features; ++$i) {
my $feature = $features[$i];
printf
"#ifdef __%s__\n" .
" | CpuFeature%s\n" .
"#endif\n",
$feature->{id}, $feature->{id};
}
print q{ ;
QT_END_NAMESPACE
#endif // QSIMD_X86_P_H
};
if (my $cpp = shift @ARGV) {
open CPP, ">", $cpp;
select CPP;
} else {
print q{
---- cut here, paste the rest into qsimd_x86.cpp ---
};
};
print q{/****************************************************************************
**
** Copyright (C) 2018 Intel Corporation.
** Contact: https://www.qt.io/licensing/
**
** This file is part of the QtCore module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see https://www.qt.io/terms-conditions. For further
** information use the contact form at https://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 3 as published by the Free Software
** Foundation and appearing in the file LICENSE.LGPL3 included in the
** packaging of this file. Please review the following information to
** ensure the GNU Lesser General Public License version 3 requirements
** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 2.0 or (at your option) the GNU General
** Public license version 3 or any later version approved by the KDE Free
** Qt Foundation. The licenses are as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
** included in the packaging of this file. Please review the following
** information to ensure the GNU General Public License requirements will
** be met: https://www.gnu.org/licenses/gpl-2.0.html and
** https://www.gnu.org/licenses/gpl-3.0.html.
**
** $QT_END_LICENSE$
**
****************************************************************************/
// This is a generated file. DO NOT EDIT.
// Please see util/x86simdgen/generate.pl";
#include "qsimd_p.h"
};
# Now generate the string table and bit-location array
my $offset = 0;
my @offsets;
print "static const char features_string[] =";
for my $feature (@features) {
print " \" $feature->{name}\\0\"";
push @offsets, $offset;
$offset += 2 + length($feature->{name});
}
print " \"\\0\";";
# Print the string offset table
printf "\nstatic const %s features_indices[] = {\n %3d",
$offset > 255 ? "quint16" : "quint8", $offset;
for (my $j = 0; $j < scalar @offsets; ++$j) {
printf ",%s%3d",
($j + 1) % 8 ? " " : "\n ", $offsets[$j];
}
print "\n};";
# Print the locator enum and table
print "\nenum X86CpuidLeaves {";
map { print " $_," } @leafNames;
print " X86CpuidMaxLeaf\n};";
my $type = scalar %leaves > 8 ? "quint16" : "quint8";
printf "\nstatic const %s x86_locators[] = {",
$type, $type;
my $lastname;
for (my $j = 0; $j < scalar @features; ++$j) {
my $feature = $features[$j];
printf ", // %s", $lastname
if defined($lastname);
printf "\n %s*32 + %2d",
$feature->{leaf}, $feature->{bit};
$lastname = $feature->{name};
}
printf qq{ // $lastname
\};
// List of AVX512 features (see detectProcessorFeatures())
static const quint64 AllAVX512 = 0};
# Print AVX512 features
for (my $j = 0; $j < scalar @features; ++$j) {
my $feature = $features[$j];
$_ = $feature->{id};
printf "\n | CpuFeature%s", $_ if /AVX512/;
}
print ";";

39
util/x86simdgen/header Normal file
View File

@ -0,0 +1,39 @@
/****************************************************************************
**
** Copyright (C) 2022 Intel Corporation.
** Contact: https://www.qt.io/licensing/
**
** This file is part of the QtCore module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see https://www.qt.io/terms-conditions. For further
** information use the contact form at https://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 3 as published by the Free Software
** Foundation and appearing in the file LICENSE.LGPL3 included in the
** packaging of this file. Please review the following information to
** ensure the GNU Lesser General Public License version 3 requirements
** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 2.0 or (at your option) the GNU General
** Public license version 3 or any later version approved by the KDE Free
** Qt Foundation. The licenses are as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
** included in the packaging of this file. Please review the following
** information to ensure the GNU General Public License requirements will
** be met: https://www.gnu.org/licenses/gpl-2.0.html and
** https://www.gnu.org/licenses/gpl-3.0.html.
**
** $QT_END_LICENSE$
**
****************************************************************************/

View File

@ -1,37 +0,0 @@
# Feature CPUID function Bit Required feature
sse2 Leaf1EDX 26
sse3 Leaf1ECX 0
ssse3 Leaf1ECX 9
fma Leaf1ECX 12
sse4.1 Leaf1ECX 19
sse4.2 Leaf1ECX 20
movbe Leaf1ECX 22
popcnt Leaf1ECX 23
aes Leaf1ECX 25 sse4.2
avx Leaf1ECX 28
f16c Leaf1ECX 29
rdrnd Leaf1ECX 30
bmi Leaf7_0EBX 3
hle Leaf7_0EBX 4
avx2 Leaf7_0EBX 5
bmi2 Leaf7_0EBX 8
rtm Leaf7_0EBX 11
avx512f Leaf7_0EBX 16
avx512dq Leaf7_0EBX 17
rdseed Leaf7_0EBX 18
avx512ifma Leaf7_0EBX 21
avx512pf Leaf7_0EBX 26
avx512er Leaf7_0EBX 27
avx512cd Leaf7_0EBX 28
sha Leaf7_0EBX 29
avx512bw Leaf7_0EBX 30
avx512vl Leaf7_0EBX 31
avx512vbmi Leaf7_0ECX 1
avx512vbmi2 Leaf7_0ECX 6
gfni Leaf7_0ECX 8
vaes Leaf7_0ECX 9
avx512vnni Leaf7_0ECX 11
avx512bitalg Leaf7_0ECX 12
avx512vpopcntdq Leaf7_0ECX 14
avx5124nniw Leaf7_0EDX 2
avx5124fmaps Leaf7_0EDX 3