qsimd: update the generator script from OpenDCDiag
I'd been making changes to that and improving it for the past 2 years without bringing it back into Qt. The list of features is mostly the same, except: - removed TSX features - removed features specific to Xeon Phi processors - added CET and AVX512FP16 features - added the bit for hybrid CPU detection See matching update at https://github.com/opendcdiag/opendcdiag/pull/49 Change-Id: I6fcda969a9e9427198bffffd16ce860b5a38aece Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
This commit is contained in:
parent
05428d9b97
commit
b852584556
@ -350,22 +350,6 @@ static void xgetbv(uint in, uint &eax, uint &edx)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// Flags from the XCR0 state register
|
|
||||||
enum XCR0Flags {
|
|
||||||
X87 = 1 << 0,
|
|
||||||
XMM0_15 = 1 << 1,
|
|
||||||
YMM0_15Hi128 = 1 << 2,
|
|
||||||
BNDRegs = 1 << 3,
|
|
||||||
BNDCSR = 1 << 4,
|
|
||||||
OpMask = 1 << 5,
|
|
||||||
ZMM0_15Hi256 = 1 << 6,
|
|
||||||
ZMM16_31 = 1 << 7,
|
|
||||||
|
|
||||||
SSEState = XMM0_15,
|
|
||||||
AVXState = XMM0_15 | YMM0_15Hi128,
|
|
||||||
AVX512State = AVXState | OpMask | ZMM0_15Hi256 | ZMM16_31
|
|
||||||
};
|
|
||||||
|
|
||||||
QT_FUNCTION_TARGET_BASELINE
|
QT_FUNCTION_TARGET_BASELINE
|
||||||
static quint64 adjustedXcr0(quint64 xcr0)
|
static quint64 adjustedXcr0(quint64 xcr0)
|
||||||
{
|
{
|
||||||
@ -386,7 +370,7 @@ static quint64 adjustedXcr0(quint64 xcr0)
|
|||||||
constexpr quintptr cpu_capabilities64 = commpage + 0x10;
|
constexpr quintptr cpu_capabilities64 = commpage + 0x10;
|
||||||
quint64 capab = *reinterpret_cast<quint64 *>(cpu_capabilities64);
|
quint64 capab = *reinterpret_cast<quint64 *>(cpu_capabilities64);
|
||||||
if (capab & kHasAVX512F)
|
if (capab & kHasAVX512F)
|
||||||
xcr0 |= AVX512State;
|
xcr0 |= XSave_Avx512State;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return xcr0;
|
return xcr0;
|
||||||
@ -395,9 +379,6 @@ static quint64 adjustedXcr0(quint64 xcr0)
|
|||||||
QT_FUNCTION_TARGET_BASELINE
|
QT_FUNCTION_TARGET_BASELINE
|
||||||
static quint64 detectProcessorFeatures()
|
static quint64 detectProcessorFeatures()
|
||||||
{
|
{
|
||||||
static const quint64 AllAVX = AllAVX512 | CpuFeatureAVX | CpuFeatureAVX2 | CpuFeatureF16C
|
|
||||||
| CpuFeatureFMA | CpuFeatureVAES;
|
|
||||||
|
|
||||||
quint64 features = 0;
|
quint64 features = 0;
|
||||||
int cpuidLevel = maxBasicCpuidSupported();
|
int cpuidLevel = maxBasicCpuidSupported();
|
||||||
#if Q_PROCESSOR_X86 < 5
|
#if Q_PROCESSOR_X86 < 5
|
||||||
@ -408,38 +389,35 @@ static quint64 detectProcessorFeatures()
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
uint results[X86CpuidMaxLeaf] = {};
|
uint results[X86CpuidMaxLeaf] = {};
|
||||||
cpuidFeatures01(results[Leaf1ECX], results[Leaf1EDX]);
|
cpuidFeatures01(results[Leaf01ECX], results[Leaf01EDX]);
|
||||||
if (cpuidLevel >= 7)
|
if (cpuidLevel >= 7)
|
||||||
cpuidFeatures07_00(results[Leaf7_0EBX], results[Leaf7_0ECX], results[Leaf7_0EDX]);
|
cpuidFeatures07_00(results[Leaf07_00EBX], results[Leaf07_00ECX], results[Leaf07_00EDX]);
|
||||||
|
|
||||||
// populate our feature list
|
// populate our feature list
|
||||||
for (uint i = 0; i < sizeof(x86_locators) / sizeof(x86_locators[0]); ++i) {
|
for (uint i = 0; i < std::size(x86_locators); ++i) {
|
||||||
uint word = x86_locators[i] / 32;
|
uint word = x86_locators[i] / 32;
|
||||||
uint bit = 1U << (x86_locators[i] % 32);
|
uint bit = 1U << (x86_locators[i] % 32);
|
||||||
quint64 feature = Q_UINT64_C(1) << (i + 1);
|
quint64 feature = Q_UINT64_C(1) << i;
|
||||||
if (results[word] & bit)
|
if (results[word] & bit)
|
||||||
features |= feature;
|
features |= feature;
|
||||||
}
|
}
|
||||||
|
|
||||||
// now check the AVX state
|
// now check the AVX state
|
||||||
quint64 xcr0 = 0;
|
quint64 xcr0 = 0;
|
||||||
if (results[Leaf1ECX] & (1u << 27)) {
|
if (results[Leaf01ECX] & (1u << 27)) {
|
||||||
// XGETBV enabled
|
// XGETBV enabled
|
||||||
uint xgetbvA = 0, xgetbvD = 0;
|
uint xgetbvA = 0, xgetbvD = 0;
|
||||||
xgetbv(0, xgetbvA, xgetbvD);
|
xgetbv(0, xgetbvA, xgetbvD);
|
||||||
|
|
||||||
xcr0 = xgetbvA;
|
xcr0 = xgetbvA;
|
||||||
if (sizeof(XCR0Flags) > sizeof(xgetbvA))
|
if (sizeof(XSaveBits) > sizeof(xgetbvA))
|
||||||
xcr0 |= quint64(xgetbvD) << 32;
|
xcr0 |= quint64(xgetbvD) << 32;
|
||||||
xcr0 = adjustedXcr0(xcr0);
|
xcr0 = adjustedXcr0(xcr0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((xcr0 & AVXState) != AVXState) {
|
for (auto req : xsave_requirements) {
|
||||||
// support for YMM registers is disabled, disable all AVX
|
if ((xcr0 & req.xsave_state) != req.xsave_state)
|
||||||
features &= ~AllAVX;
|
features &= ~req.cpu_features;
|
||||||
} else if ((xcr0 & AVX512State) != AVX512State) {
|
|
||||||
// support for ZMM registers or mask registers is disabled, disable all AVX512
|
|
||||||
features &= ~AllAVX512;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (features & CpuFeatureRDRND && !checkRdrndWorks())
|
if (features & CpuFeatureRDRND && !checkRdrndWorks())
|
||||||
|
@ -246,10 +246,12 @@ asm(
|
|||||||
# define __haswell__ 1
|
# define __haswell__ 1
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
|
QT_BEGIN_NAMESPACE
|
||||||
|
static const quint64 qCompilerCpuFeatures = _compilerCpuFeatures;
|
||||||
|
|
||||||
// This constant does not include all CPU features found in a Haswell, only
|
// This constant does not include all CPU features found in a Haswell, only
|
||||||
// those that we'd have optimized code for.
|
// those that we'd have optimized code for.
|
||||||
// Note: must use Q_CONSTEXPR here, as this file may be compiled in C mode.
|
// Note: must use Q_CONSTEXPR here, as this file may be compiled in C mode.
|
||||||
QT_BEGIN_NAMESPACE
|
|
||||||
static const quint64 CpuFeatureArchHaswell = 0
|
static const quint64 CpuFeatureArchHaswell = 0
|
||||||
| CpuFeatureSSE2
|
| CpuFeatureSSE2
|
||||||
| CpuFeatureSSE3
|
| CpuFeatureSSE3
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
/****************************************************************************
|
/****************************************************************************
|
||||||
**
|
**
|
||||||
** Copyright (C) 2018 Intel Corporation.
|
** Copyright (C) 2022 Intel Corporation.
|
||||||
** Contact: https://www.qt.io/licensing/
|
** Contact: https://www.qt.io/licensing/
|
||||||
**
|
**
|
||||||
** This file is part of the QtCore module of the Qt Toolkit.
|
** This file is part of the QtCore module of the Qt Toolkit.
|
||||||
@ -37,9 +37,7 @@
|
|||||||
**
|
**
|
||||||
****************************************************************************/
|
****************************************************************************/
|
||||||
|
|
||||||
// This is a generated file. DO NOT EDIT.
|
#include "qsimd_x86_p.h"
|
||||||
// Please see util/x86simdgen/generate.pl
|
|
||||||
#include "qsimd_p.h"
|
|
||||||
|
|
||||||
static const char features_string[] =
|
static const char features_string[] =
|
||||||
" sse2\0"
|
" sse2\0"
|
||||||
@ -55,101 +53,188 @@ static const char features_string[] =
|
|||||||
" f16c\0"
|
" f16c\0"
|
||||||
" rdrnd\0"
|
" rdrnd\0"
|
||||||
" bmi\0"
|
" bmi\0"
|
||||||
" hle\0"
|
|
||||||
" avx2\0"
|
" avx2\0"
|
||||||
" bmi2\0"
|
" bmi2\0"
|
||||||
" rtm\0"
|
|
||||||
" avx512f\0"
|
" avx512f\0"
|
||||||
" avx512dq\0"
|
" avx512dq\0"
|
||||||
" rdseed\0"
|
" rdseed\0"
|
||||||
" avx512ifma\0"
|
" avx512ifma\0"
|
||||||
" avx512pf\0"
|
|
||||||
" avx512er\0"
|
|
||||||
" avx512cd\0"
|
" avx512cd\0"
|
||||||
" sha\0"
|
" sha\0"
|
||||||
" avx512bw\0"
|
" avx512bw\0"
|
||||||
" avx512vl\0"
|
" avx512vl\0"
|
||||||
" avx512vbmi\0"
|
" avx512vbmi\0"
|
||||||
" avx512vbmi2\0"
|
" avx512vbmi2\0"
|
||||||
|
" shstk\0"
|
||||||
" gfni\0"
|
" gfni\0"
|
||||||
" vaes\0"
|
" vaes\0"
|
||||||
" avx512vnni\0"
|
" avx512vnni\0"
|
||||||
" avx512bitalg\0"
|
" avx512bitalg\0"
|
||||||
" avx512vpopcntdq\0"
|
" avx512vpopcntdq\0"
|
||||||
" avx5124nniw\0"
|
" hybrid\0"
|
||||||
" avx5124fmaps\0"
|
" ibt\0"
|
||||||
|
" avx512fp16\0"
|
||||||
"\0";
|
"\0";
|
||||||
|
|
||||||
static const quint16 features_indices[] = {
|
static const uint16_t features_indices[] = {
|
||||||
306, 0, 6, 12, 19, 24, 32, 40,
|
0, 6, 12, 19, 24, 32, 40, 47,
|
||||||
47, 55, 60, 65, 71, 78, 83, 88,
|
55, 60, 65, 71, 78, 83, 89, 95,
|
||||||
94, 100, 105, 114, 124, 132, 144, 154,
|
104, 114, 122, 134, 144, 149, 159, 169,
|
||||||
164, 174, 179, 189, 199, 211, 224, 230,
|
181, 194, 201, 207, 213, 225, 239, 256,
|
||||||
236, 248, 262, 279, 292
|
264, 269,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum X86CpuidLeaves {
|
enum X86CpuidLeaves {
|
||||||
Leaf1ECX,
|
Leaf01EDX,
|
||||||
Leaf1EDX,
|
Leaf01ECX,
|
||||||
Leaf7_0EBX,
|
Leaf07_00EBX,
|
||||||
Leaf7_0ECX,
|
Leaf07_00ECX,
|
||||||
Leaf7_0EDX,
|
Leaf07_00EDX,
|
||||||
|
Leaf07_01EAX,
|
||||||
|
Leaf13_01EAX,
|
||||||
|
Leaf80000001hECX,
|
||||||
|
Leaf80000008hEBX,
|
||||||
X86CpuidMaxLeaf
|
X86CpuidMaxLeaf
|
||||||
};
|
};
|
||||||
|
|
||||||
static const quint8 x86_locators[] = {
|
static const uint16_t x86_locators[] = {
|
||||||
Leaf1EDX*32 + 26, // sse2
|
Leaf01EDX*32 + 26, // sse2
|
||||||
Leaf1ECX*32 + 0, // sse3
|
Leaf01ECX*32 + 0, // sse3
|
||||||
Leaf1ECX*32 + 9, // ssse3
|
Leaf01ECX*32 + 9, // ssse3
|
||||||
Leaf1ECX*32 + 12, // fma
|
Leaf01ECX*32 + 12, // fma
|
||||||
Leaf1ECX*32 + 19, // sse4.1
|
Leaf01ECX*32 + 19, // sse4.1
|
||||||
Leaf1ECX*32 + 20, // sse4.2
|
Leaf01ECX*32 + 20, // sse4.2
|
||||||
Leaf1ECX*32 + 22, // movbe
|
Leaf01ECX*32 + 22, // movbe
|
||||||
Leaf1ECX*32 + 23, // popcnt
|
Leaf01ECX*32 + 23, // popcnt
|
||||||
Leaf1ECX*32 + 25, // aes
|
Leaf01ECX*32 + 25, // aes
|
||||||
Leaf1ECX*32 + 28, // avx
|
Leaf01ECX*32 + 28, // avx
|
||||||
Leaf1ECX*32 + 29, // f16c
|
Leaf01ECX*32 + 29, // f16c
|
||||||
Leaf1ECX*32 + 30, // rdrnd
|
Leaf01ECX*32 + 30, // rdrnd
|
||||||
Leaf7_0EBX*32 + 3, // bmi
|
Leaf07_00EBX*32 + 3, // bmi
|
||||||
Leaf7_0EBX*32 + 4, // hle
|
Leaf07_00EBX*32 + 5, // avx2
|
||||||
Leaf7_0EBX*32 + 5, // avx2
|
Leaf07_00EBX*32 + 8, // bmi2
|
||||||
Leaf7_0EBX*32 + 8, // bmi2
|
Leaf07_00EBX*32 + 16, // avx512f
|
||||||
Leaf7_0EBX*32 + 11, // rtm
|
Leaf07_00EBX*32 + 17, // avx512dq
|
||||||
Leaf7_0EBX*32 + 16, // avx512f
|
Leaf07_00EBX*32 + 18, // rdseed
|
||||||
Leaf7_0EBX*32 + 17, // avx512dq
|
Leaf07_00EBX*32 + 21, // avx512ifma
|
||||||
Leaf7_0EBX*32 + 18, // rdseed
|
Leaf07_00EBX*32 + 28, // avx512cd
|
||||||
Leaf7_0EBX*32 + 21, // avx512ifma
|
Leaf07_00EBX*32 + 29, // sha
|
||||||
Leaf7_0EBX*32 + 26, // avx512pf
|
Leaf07_00EBX*32 + 30, // avx512bw
|
||||||
Leaf7_0EBX*32 + 27, // avx512er
|
Leaf07_00EBX*32 + 31, // avx512vl
|
||||||
Leaf7_0EBX*32 + 28, // avx512cd
|
Leaf07_00ECX*32 + 1, // avx512vbmi
|
||||||
Leaf7_0EBX*32 + 29, // sha
|
Leaf07_00ECX*32 + 6, // avx512vbmi2
|
||||||
Leaf7_0EBX*32 + 30, // avx512bw
|
Leaf07_00ECX*32 + 7, // shstk
|
||||||
Leaf7_0EBX*32 + 31, // avx512vl
|
Leaf07_00ECX*32 + 8, // gfni
|
||||||
Leaf7_0ECX*32 + 1, // avx512vbmi
|
Leaf07_00ECX*32 + 9, // vaes
|
||||||
Leaf7_0ECX*32 + 6, // avx512vbmi2
|
Leaf07_00ECX*32 + 11, // avx512vnni
|
||||||
Leaf7_0ECX*32 + 8, // gfni
|
Leaf07_00ECX*32 + 12, // avx512bitalg
|
||||||
Leaf7_0ECX*32 + 9, // vaes
|
Leaf07_00ECX*32 + 14, // avx512vpopcntdq
|
||||||
Leaf7_0ECX*32 + 11, // avx512vnni
|
Leaf07_00EDX*32 + 15, // hybrid
|
||||||
Leaf7_0ECX*32 + 12, // avx512bitalg
|
Leaf07_00EDX*32 + 20, // ibt
|
||||||
Leaf7_0ECX*32 + 14, // avx512vpopcntdq
|
Leaf07_00EDX*32 + 23, // avx512fp16
|
||||||
Leaf7_0EDX*32 + 2, // avx5124nniw
|
};
|
||||||
Leaf7_0EDX*32 + 3 // avx5124fmaps
|
|
||||||
|
struct X86Architecture
|
||||||
|
{
|
||||||
|
uint64_t features;
|
||||||
|
char name[17 + 1];
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct X86Architecture x86_architectures[] = {
|
||||||
|
{ cpu_sapphirerapids, "Sapphire Rapids" },
|
||||||
|
{ cpu_tigerlake, "Tiger Lake" },
|
||||||
|
{ cpu_icelake_server, "Ice Lake (Server)" },
|
||||||
|
{ cpu_icelake_client, "Ice Lake (Client)" },
|
||||||
|
{ cpu_alderlake, "Alder Lake" },
|
||||||
|
{ cpu_cooperlake, "Cooper Lake" },
|
||||||
|
{ cpu_cannonlake, "Cannon Lake" },
|
||||||
|
{ cpu_cascadelake, "Cascade Lake" },
|
||||||
|
{ cpu_skylake_avx512, "Skylake (Avx512)" },
|
||||||
|
{ cpu_skylake, "Skylake" },
|
||||||
|
{ cpu_tremont, "Tremont" },
|
||||||
|
{ cpu_broadwell, "Broadwell" },
|
||||||
|
{ cpu_haswell, "Haswell" },
|
||||||
|
{ cpu_goldmont, "Goldmont" },
|
||||||
|
{ cpu_ivybridge, "Ivy Bridge" },
|
||||||
|
{ cpu_silvermont, "Silvermont" },
|
||||||
|
{ cpu_sandybridge, "Sandy Bridge" },
|
||||||
|
{ cpu_westmere, "Westmere" },
|
||||||
|
{ cpu_core2, "Core2" },
|
||||||
|
};
|
||||||
|
|
||||||
|
enum XSaveBits {
|
||||||
|
XSave_X87 = 0x0001, // X87 and MMX state
|
||||||
|
XSave_SseState = 0x0002, // SSE: 128 bits of XMM registers
|
||||||
|
XSave_Ymm_Hi128 = 0x0004, // AVX: high 128 bits in YMM registers
|
||||||
|
XSave_Bndregs = 0x0008, // Memory Protection Extensions
|
||||||
|
XSave_Bndcsr = 0x0010, // Memory Protection Extensions
|
||||||
|
XSave_OpMask = 0x0020, // AVX512: k0 through k7
|
||||||
|
XSave_Zmm_Hi256 = 0x0040, // AVX512: high 256 bits of ZMM0-15
|
||||||
|
XSave_Hi16_Zmm = 0x0080, // AVX512: all 512 bits of ZMM16-31
|
||||||
|
XSave_PTState = 0x0100, // Processor Trace
|
||||||
|
XSave_PKRUState = 0x0200, // Protection Key
|
||||||
|
XSave_CetUState = 0x0800, // CET: user mode
|
||||||
|
XSave_CetSState = 0x1000, // CET: supervisor mode
|
||||||
|
XSave_HdcState = 0x2000, // Hardware Duty Cycle
|
||||||
|
XSave_UintrState = 0x4000, // User Interrupts
|
||||||
|
XSave_HwpState = 0x10000, // Hardware P-State
|
||||||
|
XSave_Xtilecfg = 0x20000, // AMX: XTILECFG register
|
||||||
|
XSave_Xtiledata = 0x40000, // AMX: data in the tiles
|
||||||
|
XSave_AvxState = XSave_SseState | XSave_Ymm_Hi128,
|
||||||
|
XSave_MPXState = XSave_Bndregs | XSave_Bndcsr,
|
||||||
|
XSave_Avx512State = XSave_AvxState | XSave_OpMask | XSave_Zmm_Hi256 | XSave_Hi16_Zmm,
|
||||||
|
XSave_CetState = XSave_CetUState | XSave_CetSState,
|
||||||
|
XSave_AmxState = XSave_Xtilecfg | XSave_Xtiledata,
|
||||||
|
};
|
||||||
|
|
||||||
|
// List of features requiring XSave_AvxState
|
||||||
|
static const uint64_t XSaveReq_AvxState = 0
|
||||||
|
| cpu_feature_fma
|
||||||
|
| cpu_feature_avx
|
||||||
|
| cpu_feature_f16c
|
||||||
|
| cpu_feature_avx2
|
||||||
|
| cpu_feature_avx512f
|
||||||
|
| cpu_feature_avx512dq
|
||||||
|
| cpu_feature_avx512ifma
|
||||||
|
| cpu_feature_avx512cd
|
||||||
|
| cpu_feature_avx512bw
|
||||||
|
| cpu_feature_avx512vl
|
||||||
|
| cpu_feature_avx512vbmi
|
||||||
|
| cpu_feature_avx512vbmi2
|
||||||
|
| cpu_feature_vaes
|
||||||
|
| cpu_feature_avx512vnni
|
||||||
|
| cpu_feature_avx512bitalg
|
||||||
|
| cpu_feature_avx512vpopcntdq
|
||||||
|
| cpu_feature_avx512fp16;
|
||||||
|
|
||||||
|
// List of features requiring XSave_Avx512State
|
||||||
|
static const uint64_t XSaveReq_Avx512State = 0
|
||||||
|
| cpu_feature_avx512f
|
||||||
|
| cpu_feature_avx512dq
|
||||||
|
| cpu_feature_avx512ifma
|
||||||
|
| cpu_feature_avx512cd
|
||||||
|
| cpu_feature_avx512bw
|
||||||
|
| cpu_feature_avx512vl
|
||||||
|
| cpu_feature_avx512vbmi
|
||||||
|
| cpu_feature_avx512vbmi2
|
||||||
|
| cpu_feature_avx512vnni
|
||||||
|
| cpu_feature_avx512bitalg
|
||||||
|
| cpu_feature_avx512vpopcntdq
|
||||||
|
| cpu_feature_avx512fp16;
|
||||||
|
|
||||||
|
// List of features requiring XSave_CetState
|
||||||
|
static const uint64_t XSaveReq_CetState = 0
|
||||||
|
| cpu_feature_shstk;
|
||||||
|
|
||||||
|
struct XSaveRequirementMapping
|
||||||
|
{
|
||||||
|
uint64_t cpu_features;
|
||||||
|
uint64_t xsave_state;
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct XSaveRequirementMapping xsave_requirements[] = {
|
||||||
|
{ XSaveReq_AvxState, XSave_AvxState },
|
||||||
|
{ XSaveReq_Avx512State, XSave_Avx512State },
|
||||||
|
{ XSaveReq_CetState, XSave_CetState },
|
||||||
};
|
};
|
||||||
|
|
||||||
// List of AVX512 features (see detectProcessorFeatures())
|
|
||||||
static const quint64 AllAVX512 = 0
|
|
||||||
| CpuFeatureAVX512F
|
|
||||||
| CpuFeatureAVX512DQ
|
|
||||||
| CpuFeatureAVX512IFMA
|
|
||||||
| CpuFeatureAVX512PF
|
|
||||||
| CpuFeatureAVX512ER
|
|
||||||
| CpuFeatureAVX512CD
|
|
||||||
| CpuFeatureAVX512BW
|
|
||||||
| CpuFeatureAVX512VL
|
|
||||||
| CpuFeatureAVX512VBMI
|
|
||||||
| CpuFeatureAVX512VBMI2
|
|
||||||
| CpuFeatureAVX512VNNI
|
|
||||||
| CpuFeatureAVX512BITALG
|
|
||||||
| CpuFeatureAVX512VPOPCNTDQ
|
|
||||||
| CpuFeatureAVX5124NNIW
|
|
||||||
| CpuFeatureAVX5124FMAPS;
|
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
/****************************************************************************
|
/****************************************************************************
|
||||||
**
|
**
|
||||||
** Copyright (C) 2018 Intel Corporation.
|
** Copyright (C) 2022 Intel Corporation.
|
||||||
** Contact: https://www.qt.io/licensing/
|
** Contact: https://www.qt.io/licensing/
|
||||||
**
|
**
|
||||||
** This file is part of the QtCore module of the Qt Toolkit.
|
** This file is part of the QtCore module of the Qt Toolkit.
|
||||||
@ -38,224 +38,358 @@
|
|||||||
****************************************************************************/
|
****************************************************************************/
|
||||||
|
|
||||||
// This is a generated file. DO NOT EDIT.
|
// This is a generated file. DO NOT EDIT.
|
||||||
// Please see util/x86simdgen/generate.pl
|
// Please see 3rdparty/x86simd_generate.pl
|
||||||
#ifndef QSIMD_P_H
|
|
||||||
# error "Please include <private/qsimd_p.h> instead"
|
|
||||||
#endif
|
|
||||||
#ifndef QSIMD_X86_P_H
|
#ifndef QSIMD_X86_P_H
|
||||||
#define QSIMD_X86_P_H
|
#define QSIMD_X86_P_H
|
||||||
|
|
||||||
#include "qsimd_p.h"
|
#include <stdint.h>
|
||||||
|
|
||||||
//
|
|
||||||
// W A R N I N G
|
|
||||||
// -------------
|
|
||||||
//
|
|
||||||
// This file is not part of the Qt API. It exists purely as an
|
|
||||||
// implementation detail. This header file may change from version to
|
|
||||||
// version without notice, or even be removed.
|
|
||||||
//
|
|
||||||
// We mean it.
|
|
||||||
//
|
|
||||||
|
|
||||||
QT_BEGIN_NAMESPACE
|
|
||||||
|
|
||||||
// used only to indicate that the CPU detection was initialized
|
|
||||||
#define QSimdInitialized (Q_UINT64_C(1) << 0)
|
|
||||||
|
|
||||||
// in CPUID Leaf 1, EDX:
|
// in CPUID Leaf 1, EDX:
|
||||||
#define CpuFeatureSSE2 (Q_UINT64_C(1) << 1)
|
#define cpu_feature_sse2 (UINT64_C(1) << 0)
|
||||||
#define QT_FUNCTION_TARGET_STRING_SSE2 "sse2"
|
#define QT_FUNCTION_TARGET_STRING_SSE2 "sse2"
|
||||||
|
|
||||||
// in CPUID Leaf 1, ECX:
|
// in CPUID Leaf 1, ECX:
|
||||||
#define CpuFeatureSSE3 (Q_UINT64_C(1) << 2)
|
#define cpu_feature_sse3 (UINT64_C(1) << 1)
|
||||||
#define QT_FUNCTION_TARGET_STRING_SSE3 "sse3"
|
#define QT_FUNCTION_TARGET_STRING_SSE3 "sse3"
|
||||||
#define CpuFeatureSSSE3 (Q_UINT64_C(1) << 3)
|
#define cpu_feature_ssse3 (UINT64_C(1) << 2)
|
||||||
#define QT_FUNCTION_TARGET_STRING_SSSE3 "ssse3"
|
#define QT_FUNCTION_TARGET_STRING_SSSE3 "ssse3"
|
||||||
#define CpuFeatureFMA (Q_UINT64_C(1) << 4)
|
#define cpu_feature_fma (UINT64_C(1) << 3)
|
||||||
#define QT_FUNCTION_TARGET_STRING_FMA "fma"
|
#define QT_FUNCTION_TARGET_STRING_FMA "fma"
|
||||||
#define CpuFeatureSSE4_1 (Q_UINT64_C(1) << 5)
|
#define cpu_feature_sse4_1 (UINT64_C(1) << 4)
|
||||||
#define QT_FUNCTION_TARGET_STRING_SSE4_1 "sse4.1"
|
#define QT_FUNCTION_TARGET_STRING_SSE4_1 "sse4.1"
|
||||||
#define CpuFeatureSSE4_2 (Q_UINT64_C(1) << 6)
|
#define cpu_feature_sse4_2 (UINT64_C(1) << 5)
|
||||||
#define QT_FUNCTION_TARGET_STRING_SSE4_2 "sse4.2"
|
#define QT_FUNCTION_TARGET_STRING_SSE4_2 "sse4.2"
|
||||||
#define CpuFeatureMOVBE (Q_UINT64_C(1) << 7)
|
#define cpu_feature_movbe (UINT64_C(1) << 6)
|
||||||
#define QT_FUNCTION_TARGET_STRING_MOVBE "movbe"
|
#define QT_FUNCTION_TARGET_STRING_MOVBE "movbe"
|
||||||
#define CpuFeaturePOPCNT (Q_UINT64_C(1) << 8)
|
#define cpu_feature_popcnt (UINT64_C(1) << 7)
|
||||||
#define QT_FUNCTION_TARGET_STRING_POPCNT "popcnt"
|
#define QT_FUNCTION_TARGET_STRING_POPCNT "popcnt"
|
||||||
#define CpuFeatureAES (Q_UINT64_C(1) << 9)
|
#define cpu_feature_aes (UINT64_C(1) << 8)
|
||||||
#define QT_FUNCTION_TARGET_STRING_AES "aes,sse4.2"
|
#define QT_FUNCTION_TARGET_STRING_AES "aes,sse4.2"
|
||||||
#define CpuFeatureAVX (Q_UINT64_C(1) << 10)
|
#define cpu_feature_avx (UINT64_C(1) << 9)
|
||||||
#define QT_FUNCTION_TARGET_STRING_AVX "avx"
|
#define QT_FUNCTION_TARGET_STRING_AVX "avx"
|
||||||
#define CpuFeatureF16C (Q_UINT64_C(1) << 11)
|
#define cpu_feature_f16c (UINT64_C(1) << 10)
|
||||||
#define QT_FUNCTION_TARGET_STRING_F16C "f16c"
|
#define QT_FUNCTION_TARGET_STRING_F16C "f16c,avx"
|
||||||
#define CpuFeatureRDRND (Q_UINT64_C(1) << 12)
|
#define cpu_feature_rdrnd (UINT64_C(1) << 11)
|
||||||
#define QT_FUNCTION_TARGET_STRING_RDRND "rdrnd"
|
#define QT_FUNCTION_TARGET_STRING_RDRND "rdrnd"
|
||||||
|
|
||||||
// in CPUID Leaf 7, Sub-leaf 0, EBX:
|
// in CPUID Leaf 7, Sub-leaf 0, EBX:
|
||||||
#define CpuFeatureBMI (Q_UINT64_C(1) << 13)
|
#define cpu_feature_bmi (UINT64_C(1) << 12)
|
||||||
#define QT_FUNCTION_TARGET_STRING_BMI "bmi"
|
#define QT_FUNCTION_TARGET_STRING_BMI "bmi"
|
||||||
#define CpuFeatureHLE (Q_UINT64_C(1) << 14)
|
#define cpu_feature_avx2 (UINT64_C(1) << 13)
|
||||||
#define QT_FUNCTION_TARGET_STRING_HLE "hle"
|
#define QT_FUNCTION_TARGET_STRING_AVX2 "avx2,avx"
|
||||||
#define CpuFeatureAVX2 (Q_UINT64_C(1) << 15)
|
#define cpu_feature_bmi2 (UINT64_C(1) << 14)
|
||||||
#define QT_FUNCTION_TARGET_STRING_AVX2 "avx2"
|
|
||||||
#define CpuFeatureBMI2 (Q_UINT64_C(1) << 16)
|
|
||||||
#define QT_FUNCTION_TARGET_STRING_BMI2 "bmi2"
|
#define QT_FUNCTION_TARGET_STRING_BMI2 "bmi2"
|
||||||
#define CpuFeatureRTM (Q_UINT64_C(1) << 17)
|
#define cpu_feature_avx512f (UINT64_C(1) << 15)
|
||||||
#define QT_FUNCTION_TARGET_STRING_RTM "rtm"
|
#define QT_FUNCTION_TARGET_STRING_AVX512F "avx512f,avx"
|
||||||
#define CpuFeatureAVX512F (Q_UINT64_C(1) << 18)
|
#define cpu_feature_avx512dq (UINT64_C(1) << 16)
|
||||||
#define QT_FUNCTION_TARGET_STRING_AVX512F "avx512f"
|
#define QT_FUNCTION_TARGET_STRING_AVX512DQ "avx512dq,avx512f"
|
||||||
#define CpuFeatureAVX512DQ (Q_UINT64_C(1) << 19)
|
#define cpu_feature_rdseed (UINT64_C(1) << 17)
|
||||||
#define QT_FUNCTION_TARGET_STRING_AVX512DQ "avx512dq"
|
|
||||||
#define CpuFeatureRDSEED (Q_UINT64_C(1) << 20)
|
|
||||||
#define QT_FUNCTION_TARGET_STRING_RDSEED "rdseed"
|
#define QT_FUNCTION_TARGET_STRING_RDSEED "rdseed"
|
||||||
#define CpuFeatureAVX512IFMA (Q_UINT64_C(1) << 21)
|
#define cpu_feature_avx512ifma (UINT64_C(1) << 18)
|
||||||
#define QT_FUNCTION_TARGET_STRING_AVX512IFMA "avx512ifma"
|
#define QT_FUNCTION_TARGET_STRING_AVX512IFMA "avx512ifma,avx512f"
|
||||||
#define CpuFeatureAVX512PF (Q_UINT64_C(1) << 22)
|
#define cpu_feature_avx512cd (UINT64_C(1) << 19)
|
||||||
#define QT_FUNCTION_TARGET_STRING_AVX512PF "avx512pf"
|
#define QT_FUNCTION_TARGET_STRING_AVX512CD "avx512cd,avx512f"
|
||||||
#define CpuFeatureAVX512ER (Q_UINT64_C(1) << 23)
|
#define cpu_feature_sha (UINT64_C(1) << 20)
|
||||||
#define QT_FUNCTION_TARGET_STRING_AVX512ER "avx512er"
|
|
||||||
#define CpuFeatureAVX512CD (Q_UINT64_C(1) << 24)
|
|
||||||
#define QT_FUNCTION_TARGET_STRING_AVX512CD "avx512cd"
|
|
||||||
#define CpuFeatureSHA (Q_UINT64_C(1) << 25)
|
|
||||||
#define QT_FUNCTION_TARGET_STRING_SHA "sha"
|
#define QT_FUNCTION_TARGET_STRING_SHA "sha"
|
||||||
#define CpuFeatureAVX512BW (Q_UINT64_C(1) << 26)
|
#define cpu_feature_avx512bw (UINT64_C(1) << 21)
|
||||||
#define QT_FUNCTION_TARGET_STRING_AVX512BW "avx512bw"
|
#define QT_FUNCTION_TARGET_STRING_AVX512BW "avx512bw,avx512f"
|
||||||
#define CpuFeatureAVX512VL (Q_UINT64_C(1) << 27)
|
#define cpu_feature_avx512vl (UINT64_C(1) << 22)
|
||||||
#define QT_FUNCTION_TARGET_STRING_AVX512VL "avx512vl"
|
#define QT_FUNCTION_TARGET_STRING_AVX512VL "avx512vl,avx512f"
|
||||||
|
|
||||||
// in CPUID Leaf 7, Sub-leaf 0, ECX:
|
// in CPUID Leaf 7, Sub-leaf 0, ECX:
|
||||||
#define CpuFeatureAVX512VBMI (Q_UINT64_C(1) << 28)
|
#define cpu_feature_avx512vbmi (UINT64_C(1) << 23)
|
||||||
#define QT_FUNCTION_TARGET_STRING_AVX512VBMI "avx512vbmi"
|
#define QT_FUNCTION_TARGET_STRING_AVX512VBMI "avx512vbmi,avx512f"
|
||||||
#define CpuFeatureAVX512VBMI2 (Q_UINT64_C(1) << 29)
|
#define cpu_feature_avx512vbmi2 (UINT64_C(1) << 24)
|
||||||
#define QT_FUNCTION_TARGET_STRING_AVX512VBMI2 "avx512vbmi2"
|
#define QT_FUNCTION_TARGET_STRING_AVX512VBMI2 "avx512vbmi2,avx512f"
|
||||||
#define CpuFeatureGFNI (Q_UINT64_C(1) << 30)
|
#define cpu_feature_shstk (UINT64_C(1) << 25)
|
||||||
|
#define QT_FUNCTION_TARGET_STRING_SHSTK "shstk"
|
||||||
|
#define cpu_feature_gfni (UINT64_C(1) << 26)
|
||||||
#define QT_FUNCTION_TARGET_STRING_GFNI "gfni"
|
#define QT_FUNCTION_TARGET_STRING_GFNI "gfni"
|
||||||
#define CpuFeatureVAES (Q_UINT64_C(1) << 31)
|
#define cpu_feature_vaes (UINT64_C(1) << 27)
|
||||||
#define QT_FUNCTION_TARGET_STRING_VAES "vaes"
|
#define QT_FUNCTION_TARGET_STRING_VAES "vaes,avx2,avx,aes"
|
||||||
#define CpuFeatureAVX512VNNI (Q_UINT64_C(1) << 32)
|
#define cpu_feature_avx512vnni (UINT64_C(1) << 28)
|
||||||
#define QT_FUNCTION_TARGET_STRING_AVX512VNNI "avx512vnni"
|
#define QT_FUNCTION_TARGET_STRING_AVX512VNNI "avx512vnni,avx512f"
|
||||||
#define CpuFeatureAVX512BITALG (Q_UINT64_C(1) << 33)
|
#define cpu_feature_avx512bitalg (UINT64_C(1) << 29)
|
||||||
#define QT_FUNCTION_TARGET_STRING_AVX512BITALG "avx512bitalg"
|
#define QT_FUNCTION_TARGET_STRING_AVX512BITALG "avx512bitalg,avx512f"
|
||||||
#define CpuFeatureAVX512VPOPCNTDQ (Q_UINT64_C(1) << 34)
|
#define cpu_feature_avx512vpopcntdq (UINT64_C(1) << 30)
|
||||||
#define QT_FUNCTION_TARGET_STRING_AVX512VPOPCNTDQ "avx512vpopcntdq"
|
#define QT_FUNCTION_TARGET_STRING_AVX512VPOPCNTDQ "avx512vpopcntdq,avx512f"
|
||||||
|
|
||||||
// in CPUID Leaf 7, Sub-leaf 0, EDX:
|
// in CPUID Leaf 7, Sub-leaf 0, EDX:
|
||||||
#define CpuFeatureAVX5124NNIW (Q_UINT64_C(1) << 35)
|
#define cpu_feature_hybrid (UINT64_C(1) << 31)
|
||||||
#define QT_FUNCTION_TARGET_STRING_AVX5124NNIW "avx5124nniw"
|
#define QT_FUNCTION_TARGET_STRING_HYBRID "hybrid"
|
||||||
#define CpuFeatureAVX5124FMAPS (Q_UINT64_C(1) << 36)
|
#define cpu_feature_ibt (UINT64_C(1) << 32)
|
||||||
#define QT_FUNCTION_TARGET_STRING_AVX5124FMAPS "avx5124fmaps"
|
#define QT_FUNCTION_TARGET_STRING_IBT "ibt"
|
||||||
|
#define cpu_feature_avx512fp16 (UINT64_C(1) << 33)
|
||||||
|
#define QT_FUNCTION_TARGET_STRING_AVX512FP16 "avx512fp16,avx512f,f16c"
|
||||||
|
|
||||||
static const quint64 qCompilerCpuFeatures = 0
|
// CPU architectures
|
||||||
|
#define cpu_x86_64 (0 \
|
||||||
|
| cpu_feature_sse2)
|
||||||
|
#define cpu_core2 (cpu_x86_64 \
|
||||||
|
| cpu_feature_sse3 \
|
||||||
|
| cpu_feature_ssse3)
|
||||||
|
#define cpu_nhm (cpu_core2 \
|
||||||
|
| cpu_feature_sse4_1 \
|
||||||
|
| cpu_feature_sse4_2 \
|
||||||
|
| cpu_feature_popcnt)
|
||||||
|
#define cpu_wsm (cpu_nhm)
|
||||||
|
#define cpu_snb (cpu_wsm \
|
||||||
|
| cpu_feature_avx)
|
||||||
|
#define cpu_ivb (cpu_snb \
|
||||||
|
| cpu_feature_f16c \
|
||||||
|
| cpu_feature_rdrnd)
|
||||||
|
#define cpu_hsw (cpu_ivb \
|
||||||
|
| cpu_feature_avx2 \
|
||||||
|
| cpu_feature_fma \
|
||||||
|
| cpu_feature_bmi \
|
||||||
|
| cpu_feature_bmi2 \
|
||||||
|
| cpu_feature_movbe)
|
||||||
|
#define cpu_bdw (cpu_hsw \
|
||||||
|
| cpu_feature_rdseed)
|
||||||
|
#define cpu_bdx (cpu_bdw)
|
||||||
|
#define cpu_skl (cpu_bdw)
|
||||||
|
#define cpu_adl (cpu_skl \
|
||||||
|
| cpu_feature_gfni \
|
||||||
|
| cpu_feature_vaes \
|
||||||
|
| cpu_feature_shstk \
|
||||||
|
| cpu_feature_ibt)
|
||||||
|
#define cpu_skx (cpu_skl \
|
||||||
|
| cpu_feature_avx512f \
|
||||||
|
| cpu_feature_avx512dq \
|
||||||
|
| cpu_feature_avx512cd \
|
||||||
|
| cpu_feature_avx512bw \
|
||||||
|
| cpu_feature_avx512vl)
|
||||||
|
#define cpu_clx (cpu_skx \
|
||||||
|
| cpu_feature_avx512vnni)
|
||||||
|
#define cpu_cpx (cpu_clx)
|
||||||
|
#define cpu_cnl (cpu_skx \
|
||||||
|
| cpu_feature_avx512ifma \
|
||||||
|
| cpu_feature_avx512vbmi)
|
||||||
|
#define cpu_icl (cpu_cnl \
|
||||||
|
| cpu_feature_avx512vbmi2 \
|
||||||
|
| cpu_feature_gfni \
|
||||||
|
| cpu_feature_vaes \
|
||||||
|
| cpu_feature_avx512vnni \
|
||||||
|
| cpu_feature_avx512bitalg \
|
||||||
|
| cpu_feature_avx512vpopcntdq)
|
||||||
|
#define cpu_icx (cpu_icl)
|
||||||
|
#define cpu_tgl (cpu_icl \
|
||||||
|
| cpu_feature_shstk \
|
||||||
|
| cpu_feature_ibt)
|
||||||
|
#define cpu_spr (cpu_tgl)
|
||||||
|
#define cpu_slm (cpu_wsm \
|
||||||
|
| cpu_feature_rdrnd \
|
||||||
|
| cpu_feature_movbe)
|
||||||
|
#define cpu_glm (cpu_slm \
|
||||||
|
| cpu_feature_rdseed)
|
||||||
|
#define cpu_tnt (cpu_glm \
|
||||||
|
| cpu_feature_gfni)
|
||||||
|
#define cpu_nehalem (cpu_nhm)
|
||||||
|
#define cpu_westmere (cpu_wsm)
|
||||||
|
#define cpu_sandybridge (cpu_snb)
|
||||||
|
#define cpu_ivybridge (cpu_ivb)
|
||||||
|
#define cpu_haswell (cpu_hsw)
|
||||||
|
#define cpu_broadwell (cpu_bdw)
|
||||||
|
#define cpu_skylake (cpu_skl)
|
||||||
|
#define cpu_skylake_avx512 (cpu_skx)
|
||||||
|
#define cpu_cascadelake (cpu_clx)
|
||||||
|
#define cpu_cooperlake (cpu_cpx)
|
||||||
|
#define cpu_cannonlake (cpu_cnl)
|
||||||
|
#define cpu_icelake_client (cpu_icl)
|
||||||
|
#define cpu_icelake_server (cpu_icx)
|
||||||
|
#define cpu_alderlake (cpu_adl)
|
||||||
|
#define cpu_sapphirerapids (cpu_spr)
|
||||||
|
#define cpu_tigerlake (cpu_tgl)
|
||||||
|
#define cpu_silvermont (cpu_slm)
|
||||||
|
#define cpu_goldmont (cpu_glm)
|
||||||
|
#define cpu_tremont (cpu_tnt)
|
||||||
|
|
||||||
|
static const uint64_t _compilerCpuFeatures = 0
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
| CpuFeatureSSE2
|
| cpu_feature_sse2
|
||||||
#endif
|
#endif
|
||||||
#ifdef __SSE3__
|
#ifdef __SSE3__
|
||||||
| CpuFeatureSSE3
|
| cpu_feature_sse3
|
||||||
#endif
|
#endif
|
||||||
#ifdef __SSSE3__
|
#ifdef __SSSE3__
|
||||||
| CpuFeatureSSSE3
|
| cpu_feature_ssse3
|
||||||
#endif
|
#endif
|
||||||
#ifdef __FMA__
|
#ifdef __FMA__
|
||||||
| CpuFeatureFMA
|
| cpu_feature_fma
|
||||||
#endif
|
#endif
|
||||||
#ifdef __SSE4_1__
|
#ifdef __SSE4_1__
|
||||||
| CpuFeatureSSE4_1
|
| cpu_feature_sse4_1
|
||||||
#endif
|
#endif
|
||||||
#ifdef __SSE4_2__
|
#ifdef __SSE4_2__
|
||||||
| CpuFeatureSSE4_2
|
| cpu_feature_sse4_2
|
||||||
#endif
|
#endif
|
||||||
#ifdef __MOVBE__
|
#ifdef __MOVBE__
|
||||||
| CpuFeatureMOVBE
|
| cpu_feature_movbe
|
||||||
#endif
|
#endif
|
||||||
#ifdef __POPCNT__
|
#ifdef __POPCNT__
|
||||||
| CpuFeaturePOPCNT
|
| cpu_feature_popcnt
|
||||||
#endif
|
#endif
|
||||||
#ifdef __AES__
|
#ifdef __AES__
|
||||||
| CpuFeatureAES
|
| cpu_feature_aes
|
||||||
#endif
|
#endif
|
||||||
#ifdef __AVX__
|
#ifdef __AVX__
|
||||||
| CpuFeatureAVX
|
| cpu_feature_avx
|
||||||
#endif
|
#endif
|
||||||
#ifdef __F16C__
|
#ifdef __F16C__
|
||||||
| CpuFeatureF16C
|
| cpu_feature_f16c
|
||||||
#endif
|
#endif
|
||||||
#ifdef __RDRND__
|
#ifdef __RDRND__
|
||||||
| CpuFeatureRDRND
|
| cpu_feature_rdrnd
|
||||||
#endif
|
#endif
|
||||||
#ifdef __BMI__
|
#ifdef __BMI__
|
||||||
| CpuFeatureBMI
|
| cpu_feature_bmi
|
||||||
#endif
|
|
||||||
#ifdef __HLE__
|
|
||||||
| CpuFeatureHLE
|
|
||||||
#endif
|
#endif
|
||||||
#ifdef __AVX2__
|
#ifdef __AVX2__
|
||||||
| CpuFeatureAVX2
|
| cpu_feature_avx2
|
||||||
#endif
|
#endif
|
||||||
#ifdef __BMI2__
|
#ifdef __BMI2__
|
||||||
| CpuFeatureBMI2
|
| cpu_feature_bmi2
|
||||||
#endif
|
|
||||||
#ifdef __RTM__
|
|
||||||
| CpuFeatureRTM
|
|
||||||
#endif
|
#endif
|
||||||
#ifdef __AVX512F__
|
#ifdef __AVX512F__
|
||||||
| CpuFeatureAVX512F
|
| cpu_feature_avx512f
|
||||||
#endif
|
#endif
|
||||||
#ifdef __AVX512DQ__
|
#ifdef __AVX512DQ__
|
||||||
| CpuFeatureAVX512DQ
|
| cpu_feature_avx512dq
|
||||||
#endif
|
#endif
|
||||||
#ifdef __RDSEED__
|
#ifdef __RDSEED__
|
||||||
| CpuFeatureRDSEED
|
| cpu_feature_rdseed
|
||||||
#endif
|
#endif
|
||||||
#ifdef __AVX512IFMA__
|
#ifdef __AVX512IFMA__
|
||||||
| CpuFeatureAVX512IFMA
|
| cpu_feature_avx512ifma
|
||||||
#endif
|
|
||||||
#ifdef __AVX512PF__
|
|
||||||
| CpuFeatureAVX512PF
|
|
||||||
#endif
|
|
||||||
#ifdef __AVX512ER__
|
|
||||||
| CpuFeatureAVX512ER
|
|
||||||
#endif
|
#endif
|
||||||
#ifdef __AVX512CD__
|
#ifdef __AVX512CD__
|
||||||
| CpuFeatureAVX512CD
|
| cpu_feature_avx512cd
|
||||||
#endif
|
#endif
|
||||||
#ifdef __SHA__
|
#ifdef __SHA__
|
||||||
| CpuFeatureSHA
|
| cpu_feature_sha
|
||||||
#endif
|
#endif
|
||||||
#ifdef __AVX512BW__
|
#ifdef __AVX512BW__
|
||||||
| CpuFeatureAVX512BW
|
| cpu_feature_avx512bw
|
||||||
#endif
|
#endif
|
||||||
#ifdef __AVX512VL__
|
#ifdef __AVX512VL__
|
||||||
| CpuFeatureAVX512VL
|
| cpu_feature_avx512vl
|
||||||
#endif
|
#endif
|
||||||
#ifdef __AVX512VBMI__
|
#ifdef __AVX512VBMI__
|
||||||
| CpuFeatureAVX512VBMI
|
| cpu_feature_avx512vbmi
|
||||||
#endif
|
#endif
|
||||||
#ifdef __AVX512VBMI2__
|
#ifdef __AVX512VBMI2__
|
||||||
| CpuFeatureAVX512VBMI2
|
| cpu_feature_avx512vbmi2
|
||||||
|
#endif
|
||||||
|
#ifdef __SHSTK__
|
||||||
|
| cpu_feature_shstk
|
||||||
#endif
|
#endif
|
||||||
#ifdef __GFNI__
|
#ifdef __GFNI__
|
||||||
| CpuFeatureGFNI
|
| cpu_feature_gfni
|
||||||
#endif
|
#endif
|
||||||
#ifdef __VAES__
|
#ifdef __VAES__
|
||||||
| CpuFeatureVAES
|
| cpu_feature_vaes
|
||||||
#endif
|
#endif
|
||||||
#ifdef __AVX512VNNI__
|
#ifdef __AVX512VNNI__
|
||||||
| CpuFeatureAVX512VNNI
|
| cpu_feature_avx512vnni
|
||||||
#endif
|
#endif
|
||||||
#ifdef __AVX512BITALG__
|
#ifdef __AVX512BITALG__
|
||||||
| CpuFeatureAVX512BITALG
|
| cpu_feature_avx512bitalg
|
||||||
#endif
|
#endif
|
||||||
#ifdef __AVX512VPOPCNTDQ__
|
#ifdef __AVX512VPOPCNTDQ__
|
||||||
| CpuFeatureAVX512VPOPCNTDQ
|
| cpu_feature_avx512vpopcntdq
|
||||||
#endif
|
#endif
|
||||||
#ifdef __AVX5124NNIW__
|
#ifdef __HYBRID__
|
||||||
| CpuFeatureAVX5124NNIW
|
| cpu_feature_hybrid
|
||||||
#endif
|
#endif
|
||||||
#ifdef __AVX5124FMAPS__
|
#ifdef __IBT__
|
||||||
| CpuFeatureAVX5124FMAPS
|
| cpu_feature_ibt
|
||||||
|
#endif
|
||||||
|
#ifdef __AVX512FP16__
|
||||||
|
| cpu_feature_avx512fp16
|
||||||
#endif
|
#endif
|
||||||
;
|
;
|
||||||
|
|
||||||
QT_END_NAMESPACE
|
#if (defined __cplusplus) && __cplusplus >= 201103L
|
||||||
|
enum X86CpuFeatures : uint64_t {
|
||||||
|
CpuFeatureSSE2 = cpu_feature_sse2, ///< Streaming SIMD Extensions 2
|
||||||
|
CpuFeatureSSE3 = cpu_feature_sse3, ///< Streaming SIMD Extensions 3
|
||||||
|
CpuFeatureSSSE3 = cpu_feature_ssse3, ///< Supplemental Streaming SIMD Extensions 3
|
||||||
|
CpuFeatureFMA = cpu_feature_fma, ///< Fused Multiply-Add
|
||||||
|
CpuFeatureSSE4_1 = cpu_feature_sse4_1, ///< Streaming SIMD Extensions 4.1
|
||||||
|
CpuFeatureSSE4_2 = cpu_feature_sse4_2, ///< Streaming SIMD Extensions 4.2
|
||||||
|
CpuFeatureMOVBE = cpu_feature_movbe, ///< MOV Big Endian
|
||||||
|
CpuFeaturePOPCNT = cpu_feature_popcnt, ///< Population count
|
||||||
|
CpuFeatureAES = cpu_feature_aes, ///< Advenced Encryption Standard
|
||||||
|
CpuFeatureAVX = cpu_feature_avx, ///< Advanced Vector Extensions
|
||||||
|
CpuFeatureF16C = cpu_feature_f16c, ///< 16-bit Float Conversion
|
||||||
|
CpuFeatureRDRND = cpu_feature_rdrnd, ///< Random number generator
|
||||||
|
CpuFeatureBMI = cpu_feature_bmi, ///< Bit Manipulation Instructions
|
||||||
|
CpuFeatureAVX2 = cpu_feature_avx2, ///< Advanced Vector Extensions 2
|
||||||
|
CpuFeatureBMI2 = cpu_feature_bmi2, ///< Bit Manipulation Instructions 2
|
||||||
|
CpuFeatureAVX512F = cpu_feature_avx512f, ///< AVX512 Foundation
|
||||||
|
CpuFeatureAVX512DQ = cpu_feature_avx512dq, ///< AVX512 Double & Quadword
|
||||||
|
CpuFeatureRDSEED = cpu_feature_rdseed, ///< Random number generator for seeding
|
||||||
|
CpuFeatureAVX512IFMA = cpu_feature_avx512ifma, ///< AVX512 Integer Fused Multiply-Add
|
||||||
|
CpuFeatureAVX512CD = cpu_feature_avx512cd, ///< AVX512 Conflict Detection
|
||||||
|
CpuFeatureSHA = cpu_feature_sha, ///< SHA-1 and SHA-256 instructions
|
||||||
|
CpuFeatureAVX512BW = cpu_feature_avx512bw, ///< AVX512 Byte & Word
|
||||||
|
CpuFeatureAVX512VL = cpu_feature_avx512vl, ///< AVX512 Vector Length
|
||||||
|
CpuFeatureAVX512VBMI = cpu_feature_avx512vbmi, ///< AVX512 Vector Byte Manipulation Instructions
|
||||||
|
CpuFeatureAVX512VBMI2 = cpu_feature_avx512vbmi2, ///< AVX512 Vector Byte Manipulation Instructions 2
|
||||||
|
CpuFeatureSHSTK = cpu_feature_shstk, ///< Control Flow Enforcement Technology Shadow Stack
|
||||||
|
CpuFeatureGFNI = cpu_feature_gfni, ///< Galois Field new instructions
|
||||||
|
CpuFeatureVAES = cpu_feature_vaes, ///< 256- and 512-bit AES
|
||||||
|
CpuFeatureAVX512VNNI = cpu_feature_avx512vnni, ///< AVX512 Vector Neural Network Instructions
|
||||||
|
CpuFeatureAVX512BITALG = cpu_feature_avx512bitalg, ///< AVX512 Bit Algorithms
|
||||||
|
CpuFeatureAVX512VPOPCNTDQ = cpu_feature_avx512vpopcntdq, ///< AVX512 Population Count
|
||||||
|
CpuFeatureHYBRID = cpu_feature_hybrid, ///< Hybrid processor
|
||||||
|
CpuFeatureIBT = cpu_feature_ibt, ///< Control Flow Enforcement Technology Indirect Branch Tracking
|
||||||
|
CpuFeatureAVX512FP16 = cpu_feature_avx512fp16, ///< AVX512 16-bit Floating Point
|
||||||
|
}; // enum X86CpuFeatures
|
||||||
|
|
||||||
#endif // QSIMD_X86_P_H
|
enum X86CpuArchitectures : uint64_t {
|
||||||
|
CpuArchx8664 = cpu_x86_64,
|
||||||
|
CpuArchCore2 = cpu_core2,
|
||||||
|
CpuArchNHM = cpu_nhm,
|
||||||
|
CpuArchWSM = cpu_wsm,
|
||||||
|
CpuArchSNB = cpu_snb,
|
||||||
|
CpuArchIVB = cpu_ivb,
|
||||||
|
CpuArchHSW = cpu_hsw,
|
||||||
|
CpuArchBDW = cpu_bdw,
|
||||||
|
CpuArchBDX = cpu_bdx,
|
||||||
|
CpuArchSKL = cpu_skl,
|
||||||
|
CpuArchADL = cpu_adl,
|
||||||
|
CpuArchSKX = cpu_skx,
|
||||||
|
CpuArchCLX = cpu_clx,
|
||||||
|
CpuArchCPX = cpu_cpx,
|
||||||
|
CpuArchCNL = cpu_cnl,
|
||||||
|
CpuArchICL = cpu_icl,
|
||||||
|
CpuArchICX = cpu_icx,
|
||||||
|
CpuArchTGL = cpu_tgl,
|
||||||
|
CpuArchSPR = cpu_spr,
|
||||||
|
CpuArchSLM = cpu_slm,
|
||||||
|
CpuArchGLM = cpu_glm,
|
||||||
|
CpuArchTNT = cpu_tnt,
|
||||||
|
CpuArchNehalem = cpu_nehalem, ///< Intel Core i3/i5/i7
|
||||||
|
CpuArchWestmere = cpu_westmere, ///< Intel Core i3/i5/i7
|
||||||
|
CpuArchSandyBridge = cpu_sandybridge, ///< Second Generation Intel Core i3/i5/i7
|
||||||
|
CpuArchIvyBridge = cpu_ivybridge, ///< Third Generation Intel Core i3/i5/i7
|
||||||
|
CpuArchHaswell = cpu_haswell, ///< Fourth Generation Intel Core i3/i5/i7
|
||||||
|
CpuArchBroadwell = cpu_broadwell, ///< Fifth Generation Intel Core i3/i5/i7
|
||||||
|
CpuArchSkylake = cpu_skylake, ///< Sixth Generation Intel Core i3/i5/i7
|
||||||
|
CpuArchSkylakeAvx512 = cpu_skylake_avx512, ///< Intel Xeon Scalable
|
||||||
|
CpuArchCascadeLake = cpu_cascadelake, ///< Second Generation Intel Xeon Scalable
|
||||||
|
CpuArchCooperLake = cpu_cooperlake, ///< Third Generation Intel Xeon Scalable
|
||||||
|
CpuArchCannonLake = cpu_cannonlake, ///< Intel Core i3-8121U
|
||||||
|
CpuArchIceLakeClient = cpu_icelake_client, ///< Tenth Generation Intel Core i3/i5/i7
|
||||||
|
CpuArchIceLakeServer = cpu_icelake_server, ///< Third Generation Intel Xeon Scalable
|
||||||
|
CpuArchAlderLake = cpu_alderlake,
|
||||||
|
CpuArchSapphireRapids = cpu_sapphirerapids,
|
||||||
|
CpuArchTigerLake = cpu_tigerlake, ///< Eleventh Generation Intel Core i3/i5/i7
|
||||||
|
CpuArchSilvermont = cpu_silvermont,
|
||||||
|
CpuArchGoldmont = cpu_goldmont,
|
||||||
|
CpuArchTremont = cpu_tremont,
|
||||||
|
}; // enum X86cpuArchitectures
|
||||||
|
#endif /* C++11 */
|
||||||
|
|
||||||
|
#endif /* QSIMD_X86_P_H */
|
||||||
|
1
util/x86simdgen/.gitignore
vendored
Normal file
1
util/x86simdgen/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
qsimd_x86_p.h
|
35
util/x86simdgen/3rdparty/simd-amd.conf
vendored
Normal file
35
util/x86simdgen/3rdparty/simd-amd.conf
vendored
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
# -*- mode: conf; indent-tabs-mode: t -*-
|
||||||
|
# Feature CPUID function Bit Required feature
|
||||||
|
#mmxext Leaf80000001hEDX 22 # AMD extensions to MMX
|
||||||
|
#rdtscp Leaf80000001hEDX 27 # RDTSCP instruction
|
||||||
|
#3dnow Leaf80000001hEDX 31 # 3DNow! instructions
|
||||||
|
#3dnowext Leaf80000001hEDX 30 # AMD extensions to 3DNow!
|
||||||
|
lzcnt Leaf80000001hECX 5 # Leading Zero Count
|
||||||
|
sse4a Leaf80000001hECX 6 # SSE4a
|
||||||
|
xop Leaf80000001hECX 11 # eXtended Operations
|
||||||
|
fma4 Leaf80000001hECX 16 # 4-operand Fused Multiply-Add
|
||||||
|
tbm Leaf80000001hECX 21 # Trailing Bit Manipulation
|
||||||
|
clzero Leaf80000008hEBX 0 # Cacheline clear and write zero
|
||||||
|
wbnoinvd Leaf80000008hEBX 9 # Write Back with No Invalidate
|
||||||
|
|
||||||
|
# Processor/arch listing below this line
|
||||||
|
# Source: GCC gcc/common/config/i386/i386-common.c
|
||||||
|
# Source: Wikipedia
|
||||||
|
# Architecture Based on New features
|
||||||
|
arch=AmdFam10h x86_64 sse3,sse4a,cx16,popcnt,lzcnt # AMD K10
|
||||||
|
arch=BtVer1 AmdFam10h xsave # AMD Bobcat v1
|
||||||
|
arch=BtVer2 BtVer1 ssse3,sse4.1,sse4.2,avx,bmi,f16c,movbe,xsaveopt # AMD Bobcat v2
|
||||||
|
arch=BdVer1 BtVer1 ssse3,sse4.1,sse4.2,avx,xop,fma4
|
||||||
|
arch=BdVer2 BdVer1 bmi,f16c,fma,tbm
|
||||||
|
arch=BdVer3 BdVer2 fsgsbase,xsaveopt
|
||||||
|
arch=BdVer4 BdVer3 avx2,bmi2,rdrnd,movbe
|
||||||
|
arch=ZnVer1 BdVer4 adx,rdseed,clzero,clfushopt,xsavec,xsaves
|
||||||
|
arch=ZnVer2 ZnVer1 clwb,wbnoinvd
|
||||||
|
|
||||||
|
arch=Barcelona AmdFam10h
|
||||||
|
arch=Bulldozer BdVer1 # AMD Bulldozer
|
||||||
|
arch=Piledriver BdVer2 # AMD Bulldozer v2 (Piledriver)
|
||||||
|
arch=Steamroller BdVer3 # AMD Bulldozer v3 (Steamroller)
|
||||||
|
arch=Excavator BdVer4 # AMD Bulldozer v4 (Excavator)
|
||||||
|
arch=Zen ZnVer1 # AMD Zen
|
||||||
|
arch=Zen2 ZnVer2 # AMD Zen2
|
179
util/x86simdgen/3rdparty/simd-intel.conf
vendored
Normal file
179
util/x86simdgen/3rdparty/simd-intel.conf
vendored
Normal file
@ -0,0 +1,179 @@
|
|||||||
|
# -*- mode: conf; indent-tabs-mode: t -*-
|
||||||
|
# Feature CPUID function Bit Required feature
|
||||||
|
#sep Leaf01EDX 11 # Sysenter/sysexit
|
||||||
|
#cmov Leaf01EDX 15 # Conditional Move
|
||||||
|
#clflush Leaf01EDX 19 # Cache-Line Flush
|
||||||
|
#mmx Leaf01EDX 23 # Multi Media Extensions
|
||||||
|
#fxsr Leaf01EDX 24 # FXSAVE instruction
|
||||||
|
#sse Leaf01EDX 25 # Streaming SIMD Extensions
|
||||||
|
sse2 Leaf01EDX 26 # Streaming SIMD Extensions 2
|
||||||
|
# -- everything above this line is mandatory on x86-64 --
|
||||||
|
sse3 Leaf01ECX 0 # Streaming SIMD Extensions 3
|
||||||
|
#pclmul Leaf01ECX 1 # Carryless Multiply
|
||||||
|
ssse3 Leaf01ECX 9 # Supplemental Streaming SIMD Extensions 3
|
||||||
|
fma Leaf01ECX 12 # Fused Multiply-Add
|
||||||
|
#cx16 Leaf01ECX 13 # Compare-Exchange 16 bytes
|
||||||
|
sse4.1 Leaf01ECX 19 # Streaming SIMD Extensions 4.1
|
||||||
|
sse4.2 Leaf01ECX 20 # Streaming SIMD Extensions 4.2
|
||||||
|
movbe Leaf01ECX 22 # MOV Big Endian
|
||||||
|
popcnt Leaf01ECX 23 # Population count
|
||||||
|
aes Leaf01ECX 25 sse4.2 # Advenced Encryption Standard
|
||||||
|
#xsave Leaf01ECX 26 # XSAVE, XGETBV instructions
|
||||||
|
#osxsave Leaf01ECX 27 # XSAVE enabled by OS
|
||||||
|
avx Leaf01ECX 28 # Advanced Vector Extensions
|
||||||
|
f16c Leaf01ECX 29 avx # 16-bit Float Conversion
|
||||||
|
rdrnd Leaf01ECX 30 # Random number generator
|
||||||
|
#hypervisor Leaf01ECX 31 # Running on a hypervisor
|
||||||
|
#fsgsbase Leaf07_00EBX 0 # FS/GS base access
|
||||||
|
bmi Leaf07_00EBX 3 # Bit Manipulation Instructions
|
||||||
|
#hle Leaf07_00EBX 4 # Hardware Lock Ellision
|
||||||
|
avx2 Leaf07_00EBX 5 avx # Advanced Vector Extensions 2
|
||||||
|
bmi2 Leaf07_00EBX 8 # Bit Manipulation Instructions 2
|
||||||
|
#erms Leaf07_00EBX 9 # Enhanced REP MOVSB/STOSB
|
||||||
|
#rtm Leaf07_00EBX 11 # Restricted Transactional Memory
|
||||||
|
#rdt_m Leaf07_00EBX 12 # Resource Director Technology (RDT) Monitoring
|
||||||
|
#mpx Leaf07_00EBX 14 # Memory Protection Extensions
|
||||||
|
#rdt_a Leaf07_00EBX 12 # Resource Director Technology (RDT) Allocation
|
||||||
|
avx512f Leaf07_00EBX 16 avx # AVX512 Foundation
|
||||||
|
avx512dq Leaf07_00EBX 17 avx512f # AVX512 Double & Quadword
|
||||||
|
rdseed Leaf07_00EBX 18 # Random number generator for seeding
|
||||||
|
#adx Leaf07_00EBX 19 # Multi-Precision Add-Carry
|
||||||
|
avx512ifma Leaf07_00EBX 21 avx512f # AVX512 Integer Fused Multiply-Add
|
||||||
|
#clflushopt Leaf07_00EBX 23 # Cache-Fline Flush Optimized
|
||||||
|
#clwb Leaf07_00EBX 24 # Cache-Line Write Back
|
||||||
|
#avx512pf Leaf07_00EBX 26 avx512f # AVX512 Prefetch
|
||||||
|
#avx512er Leaf07_00EBX 27 avx512f # AVX512 Exponential & Reciprocal
|
||||||
|
avx512cd Leaf07_00EBX 28 avx512f # AVX512 Conflict Detection
|
||||||
|
sha Leaf07_00EBX 29 # SHA-1 and SHA-256 instructions
|
||||||
|
avx512bw Leaf07_00EBX 30 avx512f # AVX512 Byte & Word
|
||||||
|
avx512vl Leaf07_00EBX 31 avx512f # AVX512 Vector Length
|
||||||
|
avx512vbmi Leaf07_00ECX 1 avx512f # AVX512 Vector Byte Manipulation Instructions
|
||||||
|
#pku Leaf07_00ECX 3 # Protection Keys for User mode
|
||||||
|
#ospke Leaf07_00ECX 4 # Protection Keys Enabled by OS
|
||||||
|
#waitpkg Leaf07_00ECX 5 # User-Level Monitor / Wait
|
||||||
|
avx512vbmi2 Leaf07_00ECX 6 avx512f # AVX512 Vector Byte Manipulation Instructions 2
|
||||||
|
shstk Leaf07_00ECX 7 # Control Flow Enforcement Technology Shadow Stack
|
||||||
|
gfni Leaf07_00ECX 8 # Galois Field new instructions
|
||||||
|
vaes Leaf07_00ECX 9 avx2,avx,aes # 256- and 512-bit AES
|
||||||
|
#vpclmulqdq Leaf07_00ECX 10 avx # 256- and 512-bit Carryless Multiply
|
||||||
|
avx512vnni Leaf07_00ECX 11 avx512f # AVX512 Vector Neural Network Instructions
|
||||||
|
avx512bitalg Leaf07_00ECX 12 avx512f # AVX512 Bit Algorithms
|
||||||
|
avx512vpopcntdq Leaf07_00ECX 14 avx512f # AVX512 Population Count
|
||||||
|
#la57 Leaf07_00ECX 16 # 5-level page tables
|
||||||
|
#rdpid Leaf07_00ECX 22 # RDPID instruction
|
||||||
|
#cldemote Leaf07_00ECX 25 # Cache Line Demotion
|
||||||
|
#movdiri Leaf07_00ECX 27 # Move Direct-store Integer
|
||||||
|
#movdir64b Leaf07_00ECX 28 # Move Direct-store 64 bytes
|
||||||
|
#enqcmd Leaf07_00ECX 29 # Enqueue Command
|
||||||
|
#pks Leaf07_00ECX 31 # Protection Keys for Supervisor mode
|
||||||
|
#avx5124nniw Leaf07_00EDX 2 avx512f # AVX512 4-iteration Vector Neural Network Instructions
|
||||||
|
#avx5124fmaps Leaf07_00EDX 3 avx512f # AVX512 4-iteration Fused Multiply Accumulation
|
||||||
|
#fsrm Leaf07_00EDX 4 # Fast Short REP MOV
|
||||||
|
#uintr Leaf07_00EDX 5 # User interrupts
|
||||||
|
#avx512vp2intersect Leaf07_00EDX 8 avx512f # AVX512 Intersection computation
|
||||||
|
#serialize Leaf07_00EDX 14 # SERIALIZE instruction
|
||||||
|
hybrid Leaf07_00EDX 15 # Hybrid processor
|
||||||
|
#tsxldtrk Leaf07_00EDX 16 # TDX (RTM) Suspend Load Address Tracking
|
||||||
|
#pconfig Leaf07_00EDX 18 # Platform configuration
|
||||||
|
ibt Leaf07_00EDX 20 # Control Flow Enforcement Technology Indirect Branch Tracking
|
||||||
|
#amxbf16 Leaf07_00EDX 22 amxtile # AMX Tile multiplication in BFloat16
|
||||||
|
avx512fp16 Leaf07_00EDX 23 avx512f,f16c # AVX512 16-bit Floating Point
|
||||||
|
#amxtile Leaf07_00EDX 24 # Advanced Matrix Extensions Tile support
|
||||||
|
#amxint8 Leaf07_00EDX 25 amxtile # AMX Tile multiplication for Int8
|
||||||
|
#avxvnni Leaf07_01EAX 4 avx # AVX (VEX-encoded) versions of the Vector Neural Network Instructions
|
||||||
|
#avx512bf16 Leaf07_01EAX 5 avx512f # AVX512 Brain Float16
|
||||||
|
#zlmovsb Leaf07_01EAX 10 # Zero-length MOVSB
|
||||||
|
#fsrs Leaf07_01EAX 11 # Fast Short (REP?) STOSB
|
||||||
|
#fsrc Leaf07_01EAX 12 # Fast Short (REP?) CMPSB, SCASB
|
||||||
|
#fred Leaf07_01EAX 17 # Flexible Return and Event Delivery
|
||||||
|
#lkgs Leaf07_01EAX 18 # Load into Kernel GS
|
||||||
|
#lam Leaf07_01EAX 26 # Linear Address Masking
|
||||||
|
#xsaveopt Leaf13_01EAX 0 # Optimized XSAVE
|
||||||
|
#xsavec Leaf13_01EAX 1 # XSAVE with Compaction
|
||||||
|
#xgetbv1 Leaf13_01EAX 2 # XGETBV with ECX=1
|
||||||
|
#xsaves Leaf13_01EAX 3 # XSAVE Supervisor mode
|
||||||
|
#xfd Leaf13_01EAX 4 # eXtended Feature Disable MSR
|
||||||
|
#lzcnt Leaf80000001hECX 5 # Leading Zero Count
|
||||||
|
|
||||||
|
# XSAVE states
|
||||||
|
# Source: Intel Software Development Manual, Volume 1, Chapter 13
|
||||||
|
# Source: Intel Instruction Set Extensions Manual (ed. 041), Chapter 3, "Intel AMX Instruction Set"
|
||||||
|
# Grouping Value Required for
|
||||||
|
xsave=X87 0x0001 # X87 and MMX state
|
||||||
|
xsave=SseState 0x0002 sse # SSE: 128 bits of XMM registers
|
||||||
|
xsave=Ymm_Hi128 0x0004 # AVX: high 128 bits in YMM registers
|
||||||
|
xsave=Bndregs 0x0008 # Memory Protection Extensions
|
||||||
|
xsave=Bndcsr 0x0010 # Memory Protection Extensions
|
||||||
|
xsave=OpMask 0x0020 # AVX512: k0 through k7
|
||||||
|
xsave=Zmm_Hi256 0x0040 # AVX512: high 256 bits of ZMM0-15
|
||||||
|
xsave=Hi16_Zmm 0x0080 # AVX512: all 512 bits of ZMM16-31
|
||||||
|
xsave=PTState 0x0100 # Processor Trace
|
||||||
|
xsave=PKRUState 0x0200 pku # Protection Key
|
||||||
|
# ??? 0x0400
|
||||||
|
xsave=CetUState 0x0800 # CET: user mode
|
||||||
|
xsave=CetSState 0x1000 # CET: supervisor mode
|
||||||
|
xsave=HdcState 0x2000 # Hardware Duty Cycle
|
||||||
|
xsave=UintrState 0x4000 uintr # User Interrupts
|
||||||
|
# ??? 0x8000
|
||||||
|
xsave=HwpState 0x10000 # Hardware P-State
|
||||||
|
xsave=Xtilecfg 0x20000 # AMX: XTILECFG register
|
||||||
|
xsave=Xtiledata 0x40000 # AMX: data in the tiles
|
||||||
|
xsave=AvxState SseState|Ymm_Hi128 avx,fma,avx512f
|
||||||
|
xsave=MPXState Bndregs|Bndcsr mpx
|
||||||
|
xsave=Avx512State AvxState|OpMask|Zmm_Hi256|Hi16_Zmm avx512f
|
||||||
|
xsave=CetState CetUState|CetSState shstk
|
||||||
|
xsave=AmxState Xtilecfg|Xtiledata amxtile
|
||||||
|
|
||||||
|
# Processor/arch listing below this line
|
||||||
|
# Source: Intel Instruction Set Extension manual, section 1.2
|
||||||
|
# Source: GCC gcc/config/i386/i386.h, i386-c.c, i386-builtins.c
|
||||||
|
# Architecture Based on New features Optional features
|
||||||
|
arch=x86_64 <> sse2
|
||||||
|
# Core line
|
||||||
|
arch=Core2 x86_64 sse3,ssse3,cx16
|
||||||
|
arch=NHM Core2 sse4.1,sse4.2,popcnt
|
||||||
|
arch=WSM NHM
|
||||||
|
arch=SNB WSM avx
|
||||||
|
arch=IVB SNB f16c,rdrnd,fsgsbase
|
||||||
|
arch=HSW IVB avx2,fma,bmi,bmi2,lzcnt,movbe
|
||||||
|
arch=BDW HSW adx,rdseed
|
||||||
|
arch=BDX BDW
|
||||||
|
arch=SKL BDW xsavec,xsaves
|
||||||
|
arch=ADL SKL avxvnni,gfni,vaes,vpclmulqdq,serialize,shstk,cldemote,movdiri,movdir64b,ibt,waitpkg,keylocker rdpid
|
||||||
|
arch=SKX SKL avx512f,avx512dq,avx512cd,avx512bw,avx512vl clwb
|
||||||
|
arch=CLX SKX avx512vnni
|
||||||
|
arch=CPX CLX avx512bf16
|
||||||
|
arch=CNL SKX avx512ifma,avx512vbmi sha
|
||||||
|
arch=ICL CNL avx512vbmi2,gfni,vaes,vpclmulqdq,avx512vnni,avx512bitalg,avx512vpopcntdq fsrm,rdpid
|
||||||
|
arch=ICX ICL pconfig
|
||||||
|
arch=TGL ICL avx512vp2intersect,shstk,,movdiri,movdir64b,ibt,keylocker
|
||||||
|
arch=SPR TGL avx512bf16,amxtile,amxbf16,amxint8,avxvnni,cldemote,pconfig,waitpkg,serialize,tsxldtrk,uintr
|
||||||
|
# Atom line
|
||||||
|
arch=SLM WSM rdrnd,movbe
|
||||||
|
arch=GLM SLM fsgsbase,rdseed,lzcnt,xsavec,xsaves
|
||||||
|
arch=TNT GLM clwb,gfni,cldemote,waitpkg,movdiri,movdir64b
|
||||||
|
# Xeon Phi line
|
||||||
|
#arch=KNL SKL avx512f,avx512er,avx512pf,avx512cd
|
||||||
|
#arch=KNM KNL avx5124fmaps,avx5124vnniw,avx512vpopcntdq
|
||||||
|
# Longer names
|
||||||
|
arch=Nehalem NHM # Intel Core i3/i5/i7
|
||||||
|
arch=Westmere WSM # Intel Core i3/i5/i7
|
||||||
|
arch=SandyBridge SNB # Second Generation Intel Core i3/i5/i7
|
||||||
|
arch=IvyBridge IVB # Third Generation Intel Core i3/i5/i7
|
||||||
|
arch=Haswell HSW # Fourth Generation Intel Core i3/i5/i7
|
||||||
|
arch=Broadwell BDW # Fifth Generation Intel Core i3/i5/i7
|
||||||
|
arch=Skylake SKL # Sixth Generation Intel Core i3/i5/i7
|
||||||
|
arch=Skylake-Avx512 SKX # Intel Xeon Scalable
|
||||||
|
arch=CascadeLake CLX # Second Generation Intel Xeon Scalable
|
||||||
|
arch=CooperLake CPX # Third Generation Intel Xeon Scalable
|
||||||
|
arch=CannonLake CNL # Intel Core i3-8121U
|
||||||
|
arch=IceLake-Client ICL # Tenth Generation Intel Core i3/i5/i7
|
||||||
|
arch=IceLake-Server ICX # Third Generation Intel Xeon Scalable
|
||||||
|
arch=AlderLake ADL
|
||||||
|
arch=SapphireRapids SPR
|
||||||
|
arch=TigerLake TGL # Eleventh Generation Intel Core i3/i5/i7
|
||||||
|
arch=Silvermont SLM
|
||||||
|
arch=Goldmont GLM
|
||||||
|
arch=Tremont TNT
|
||||||
|
#arch=KnightsLanding KNL
|
||||||
|
#arch=KnightsMill KNM
|
329
util/x86simdgen/3rdparty/x86simd_generate.pl
vendored
Executable file
329
util/x86simdgen/3rdparty/x86simd_generate.pl
vendored
Executable file
@ -0,0 +1,329 @@
|
|||||||
|
#!/usr/bin/env perl
|
||||||
|
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
$\ = "\n";
|
||||||
|
$/ = "\n";
|
||||||
|
my $debug = 0;
|
||||||
|
my %leaves = (
|
||||||
|
Leaf01ECX => "CPUID Leaf 1, ECX",
|
||||||
|
Leaf07_00EBX => "CPUID Leaf 7, Sub-leaf 0, EBX",
|
||||||
|
Leaf07_00ECX => "CPUID Leaf 7, Sub-leaf 0, ECX",
|
||||||
|
Leaf07_00EDX => "CPUID Leaf 7, Sub-leaf 0, EDX",
|
||||||
|
Leaf07_01EAX => "CPUID Leaf 7, Sub-leaf 1, EAX",
|
||||||
|
Leaf13_01EAX => "CPUID Leaf 13, Sub-leaf 1, EAX",
|
||||||
|
Leaf80000001hECX => "CPUID Leaf 80000001h, ECX",
|
||||||
|
Leaf80000008hEBX => "CPUID Leaf 80000008h, EBX",
|
||||||
|
);
|
||||||
|
my @leafNames = sort keys %leaves;
|
||||||
|
|
||||||
|
# out of order (we want it first)
|
||||||
|
unshift @leafNames, "Leaf01EDX";
|
||||||
|
$leaves{Leaf01EDX} = "CPUID Leaf 1, EDX";
|
||||||
|
|
||||||
|
# Read input from file specified by first argument
|
||||||
|
my $input_conf_file = shift @ARGV;
|
||||||
|
open(FH, '<', $input_conf_file) or die $!;
|
||||||
|
|
||||||
|
my $i = 0;
|
||||||
|
my @features;
|
||||||
|
my @architecture_names;
|
||||||
|
my %architectures;
|
||||||
|
my @xsaveStates;
|
||||||
|
my $maxarchnamelen = 0;
|
||||||
|
while (<FH>) {
|
||||||
|
chomp $_;
|
||||||
|
m/#\s*(.*)\s*/;
|
||||||
|
my $comment = $1;
|
||||||
|
|
||||||
|
s/#.*$//;
|
||||||
|
s/^\s+//;
|
||||||
|
next if $_ eq "";
|
||||||
|
|
||||||
|
if (s/^arch=//) {
|
||||||
|
my ($arch, $based, $f) = split /\s+/;
|
||||||
|
die("Unknown base architecture \"$based\"")
|
||||||
|
unless $based eq "<>" or grep {$_ eq $based} @architecture_names;
|
||||||
|
my $id = lc($arch);
|
||||||
|
$id =~ s/[^A-Za-z0-9_]/_/g;
|
||||||
|
|
||||||
|
my $prettyname = $arch;
|
||||||
|
$prettyname =~ s/\B([A-Z])/ $1/g;
|
||||||
|
$prettyname =~ s/-(\w+)/ ($1)/g;
|
||||||
|
$maxarchnamelen = length($prettyname) if length($prettyname) > $maxarchnamelen;
|
||||||
|
|
||||||
|
my @basefeatures;
|
||||||
|
my @extrafeatures;
|
||||||
|
@basefeatures = @{$architectures{$based}->{allfeatures}} if $based ne "<>";
|
||||||
|
@extrafeatures = @{$architectures{$arch}{features}} if defined($architectures{$arch});
|
||||||
|
@extrafeatures = (@extrafeatures, split(',', $f));
|
||||||
|
my @allfeatures = sort (@basefeatures, @extrafeatures);
|
||||||
|
|
||||||
|
$architectures{$arch} = {
|
||||||
|
name => $arch,
|
||||||
|
prettyname => $prettyname,
|
||||||
|
id => $id,
|
||||||
|
base => $based,
|
||||||
|
features => \@extrafeatures,
|
||||||
|
allfeatures => \@allfeatures,
|
||||||
|
comment => $comment
|
||||||
|
};
|
||||||
|
push @architecture_names, $arch
|
||||||
|
unless grep {$_ eq $arch} @architecture_names;
|
||||||
|
} elsif (s/^xsave=//) {
|
||||||
|
my ($name, $value, $required) = split /\s+/;
|
||||||
|
push @xsaveStates,
|
||||||
|
{ id => $name, value => $value, required_for => $required, comment => $comment };
|
||||||
|
} else {
|
||||||
|
my ($name, $function, $bit, $depends) = split /\s+/;
|
||||||
|
die("Unknown CPUID function \"$function\"")
|
||||||
|
unless grep {$_ eq $function} @leafNames;
|
||||||
|
if (my @match = grep { $_->{name} eq $name } @features) {
|
||||||
|
die("internal error") if scalar @match != 1;
|
||||||
|
next if $match[0]->{function} eq $function &&
|
||||||
|
$match[0]->{bit} eq $bit && $match[0]->{depends} eq $depends;
|
||||||
|
die("Duplicate feature \"$name\" with different details. " .
|
||||||
|
"Previously was $match[0]->{function} bit $match[0]->{bit}.");
|
||||||
|
}
|
||||||
|
|
||||||
|
my $id = uc($name);
|
||||||
|
$id =~ s/[^A-Z0-9_]/_/g;
|
||||||
|
push @features,
|
||||||
|
{ name => $name, depends => $depends, id => $id, bit => $bit, leaf => $function, comment => $comment };
|
||||||
|
++$i;
|
||||||
|
die("Too many features to fit a 64-bit integer") if $i > 64;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
close FH;
|
||||||
|
|
||||||
|
# Print the header output
|
||||||
|
my $headername = "";
|
||||||
|
my $headerguard = "";
|
||||||
|
if ($headername = shift @ARGV) {
|
||||||
|
|
||||||
|
$headerguard = uc($headername);
|
||||||
|
$headerguard =~ s/[^A-Z0-9_]/_/g;
|
||||||
|
|
||||||
|
print qq|// This is a generated file. DO NOT EDIT.
|
||||||
|
// Please see $0
|
||||||
|
#ifndef $headerguard
|
||||||
|
#define $headerguard
|
||||||
|
|
||||||
|
#include <stdint.h>|;
|
||||||
|
} else {
|
||||||
|
$debug = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
# Print the feature list
|
||||||
|
my $lastleaf;
|
||||||
|
for (my $i = 0; $i < scalar @features; ++$i) {
|
||||||
|
my $feature = $features[$i];
|
||||||
|
# Leaf header:
|
||||||
|
printf "\n// in %s:\n", $leaves{$feature->{leaf}}
|
||||||
|
if $feature->{leaf} ne $lastleaf;
|
||||||
|
$lastleaf = $feature->{leaf};
|
||||||
|
|
||||||
|
# Feature
|
||||||
|
printf "#define cpu_feature_%-31s (UINT64_C(1) << %d)\n", lc($feature->{id}), $i;
|
||||||
|
|
||||||
|
# Feature string names for Clang and GCC
|
||||||
|
my $str = $feature->{name} . ',' . $feature->{depends};
|
||||||
|
$str =~ s/,$//;
|
||||||
|
printf "#define QT_FUNCTION_TARGET_STRING_%-17s \"%s\"\n",
|
||||||
|
$feature->{id}, $str;
|
||||||
|
}
|
||||||
|
|
||||||
|
# Print the architecture list
|
||||||
|
print "\n// CPU architectures";
|
||||||
|
for (@architecture_names) {
|
||||||
|
my $arch = $architectures{$_};
|
||||||
|
my $base = $arch->{base};
|
||||||
|
if ($base eq "<>") {
|
||||||
|
$base = "0";
|
||||||
|
} else {
|
||||||
|
$base =~ s/[^A-Za-z0-9_]/_/g;
|
||||||
|
$base = "cpu_" . $base;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf "#define cpu_%-19s (%s", lc($arch->{id}), lc($base);
|
||||||
|
|
||||||
|
for my $f (@{$arch->{features}}) {
|
||||||
|
my @match = grep { $_->{name} eq $f } @features;
|
||||||
|
if (scalar @match == 1) {
|
||||||
|
printf " \\\n%33s| cpu_feature_%s", " ", lc($match[0]->{id});
|
||||||
|
} else {
|
||||||
|
printf STDERR "%s: unknown feature '%s' for CPU '%s'\n", $0, $f, $arch->{name}
|
||||||
|
if $debug;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
print ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
print q{
|
||||||
|
static const uint64_t _compilerCpuFeatures = 0};
|
||||||
|
|
||||||
|
# And print the compiler-enabled features part:
|
||||||
|
for (my $i = 0; $i < scalar @features; ++$i) {
|
||||||
|
my $feature = $features[$i];
|
||||||
|
printf
|
||||||
|
"#ifdef __%s__\n" .
|
||||||
|
" | cpu_feature_%s\n" .
|
||||||
|
"#endif\n",
|
||||||
|
$feature->{id}, lc($feature->{id});
|
||||||
|
}
|
||||||
|
|
||||||
|
print ' ;';
|
||||||
|
if ($headerguard ne "") {
|
||||||
|
print q|
|
||||||
|
#if (defined __cplusplus) && __cplusplus >= 201103L
|
||||||
|
enum X86CpuFeatures : uint64_t {|;
|
||||||
|
|
||||||
|
for (@features) {
|
||||||
|
my $line = sprintf "CpuFeature%s = cpu_feature_%s,", $_->{id}, lc($_->{id});
|
||||||
|
if ($_->{comment} ne "") {
|
||||||
|
printf " %-56s ///< %s\n", $line, $_->{comment};
|
||||||
|
} else {
|
||||||
|
print " $line";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
print qq|}; // enum X86CpuFeatures
|
||||||
|
|
||||||
|
enum X86CpuArchitectures : uint64_t {|;
|
||||||
|
|
||||||
|
for (@architecture_names) {
|
||||||
|
my $arch = $architectures{$_};
|
||||||
|
my $name = $arch->{name};
|
||||||
|
$name =~ s/[^A-Za-z0-9]//g;
|
||||||
|
my $line = sprintf "CpuArch%s = cpu_%s,", $name, lc($arch->{id});
|
||||||
|
if ($arch->{comment} ne "") {
|
||||||
|
printf " %-56s ///< %s\n", $line, $arch->{comment};
|
||||||
|
} else {
|
||||||
|
print " $line";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
print qq|}; // enum X86cpuArchitectures
|
||||||
|
#endif /* C++11 */\n|;
|
||||||
|
};
|
||||||
|
|
||||||
|
print "// -- implementation start --\n";
|
||||||
|
# Now generate the string table and bit-location array
|
||||||
|
my $offset = 0;
|
||||||
|
my @offsets;
|
||||||
|
print "static const char features_string[] =";
|
||||||
|
for my $feature (@features) {
|
||||||
|
print " \" $feature->{name}\\0\"";
|
||||||
|
push @offsets, $offset;
|
||||||
|
$offset += 2 + length($feature->{name});
|
||||||
|
}
|
||||||
|
print " \"\\0\";";
|
||||||
|
|
||||||
|
# Print the string offset table
|
||||||
|
printf "\nstatic const %s features_indices[] = {",
|
||||||
|
$offset > 255 ? "uint16_t" : "uint8_t";
|
||||||
|
for (my $j = 0; $j < scalar @offsets; ++$j) {
|
||||||
|
printf "%s%3d,",
|
||||||
|
$j % 8 ? " " : "\n ", $offsets[$j];
|
||||||
|
}
|
||||||
|
print "\n};";
|
||||||
|
|
||||||
|
# Print the locator enum and table
|
||||||
|
print "\nenum X86CpuidLeaves {";
|
||||||
|
map { print " $_," } @leafNames;
|
||||||
|
print " X86CpuidMaxLeaf\n};";
|
||||||
|
|
||||||
|
my $type = scalar %leaves > 8 ? "uint16_t" : "uint8_t";
|
||||||
|
printf "\nstatic const %s x86_locators[] = {\n",
|
||||||
|
$type, $type;
|
||||||
|
for (my $j = 0; $j < scalar @features; ++$j) {
|
||||||
|
my $feature = $features[$j];
|
||||||
|
printf " %s*32 + %2d, %s// %s\n",
|
||||||
|
$feature->{leaf}, $feature->{bit}, ' ' x (24 - length($feature->{leaf})), $feature->{name};
|
||||||
|
}
|
||||||
|
print '};';
|
||||||
|
|
||||||
|
# Generate the processor name listing, sorted by feature length
|
||||||
|
my %sorted_archs;
|
||||||
|
for (@architecture_names) {
|
||||||
|
my $arch = $architectures{$_};
|
||||||
|
my $key = sprintf "%02d_%s", scalar(@{$arch->{allfeatures}}), join(',', @{$arch->{allfeatures}});
|
||||||
|
$sorted_archs{$key} = $arch;
|
||||||
|
}
|
||||||
|
print qq|
|
||||||
|
struct X86Architecture
|
||||||
|
{
|
||||||
|
uint64_t features;
|
||||||
|
char name[$maxarchnamelen + 1];
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct X86Architecture x86_architectures[] = {|;
|
||||||
|
for (sort { $b <=> $a } keys %sorted_archs) {
|
||||||
|
my $arch = $sorted_archs{$_};
|
||||||
|
next if $arch->{base} eq "<>";
|
||||||
|
printf " { cpu_%s, \"%s\" },\n", $arch->{id}, $arch->{prettyname};
|
||||||
|
}
|
||||||
|
print "};";
|
||||||
|
|
||||||
|
# Produce the list of XSAVE states
|
||||||
|
print "\nenum XSaveBits {";
|
||||||
|
my $xsaveEnumPrefix = "XSave_";
|
||||||
|
for my $state (@xsaveStates) {
|
||||||
|
my $value = $state->{value};
|
||||||
|
unless ($value =~ /^0x/) {
|
||||||
|
# Compound value
|
||||||
|
$value = join(" | ", map { $xsaveEnumPrefix . $_ } split(/\|/, $value));
|
||||||
|
}
|
||||||
|
printf " %s%-12s = %s,", $xsaveEnumPrefix, $state->{id}, $value;
|
||||||
|
printf "%s// %s", ' ' x (18 - length($value)), $state->{comment}
|
||||||
|
if $state->{comment} ne '';
|
||||||
|
printf "\n";
|
||||||
|
};
|
||||||
|
print "};";
|
||||||
|
|
||||||
|
# Produce a list of features require extended XSAVE state
|
||||||
|
my $xsaveRequirementMapping;
|
||||||
|
for my $state (@xsaveStates) {
|
||||||
|
my $xsaveReqPrefix = "XSaveReq_";
|
||||||
|
my @required_for = split /,/, $state->{required_for};
|
||||||
|
next unless scalar @required_for;
|
||||||
|
|
||||||
|
my $prefix = sprintf "\n// List of features requiring %s%s\nstatic const uint64_t %s%s = 0",
|
||||||
|
$xsaveEnumPrefix, $state->{id}, $xsaveReqPrefix, $state->{id};
|
||||||
|
|
||||||
|
# match either the feature name or one of its requirements against list
|
||||||
|
# of features that this state is required for
|
||||||
|
for my $feature (@features) {
|
||||||
|
my $id = lc($feature->{id});
|
||||||
|
my $required = 0;
|
||||||
|
for my $requirement (@required_for) {
|
||||||
|
my @depends = split /,/, "$id," . $feature->{depends};
|
||||||
|
$required = grep { $_ eq $requirement } @depends;
|
||||||
|
last if $required;
|
||||||
|
}
|
||||||
|
printf "$prefix\n | cpu_feature_%s", $id if $required;
|
||||||
|
$prefix = "" if $required;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($prefix eq "") {
|
||||||
|
# we printed something
|
||||||
|
print ";";
|
||||||
|
$xsaveRequirementMapping .= sprintf " { %s%s, %s%s },\n",
|
||||||
|
$xsaveReqPrefix, $state->{id}, $xsaveEnumPrefix, $state->{id};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Finally, make a table
|
||||||
|
printf qq|
|
||||||
|
struct XSaveRequirementMapping
|
||||||
|
{
|
||||||
|
uint64_t cpu_features;
|
||||||
|
uint64_t xsave_state;
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct XSaveRequirementMapping xsave_requirements[] = {
|
||||||
|
%s};
|
||||||
|
|
||||||
|
// -- implementation end --
|
||||||
|
#endif /* $headerguard */\n|, $xsaveRequirementMapping if $xsaveRequirementMapping ne "";
|
19
util/x86simdgen/Makefile
Normal file
19
util/x86simdgen/Makefile
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
GENERATOR = 3rdparty/x86simd_generate.pl
|
||||||
|
TARGETDIR = ../../src/corelib/global/
|
||||||
|
TARGETCPP = qsimd_x86.cpp
|
||||||
|
TARGETHEADER = qsimd_x86_p.h
|
||||||
|
|
||||||
|
CONF_FILES = 3rdparty/simd-intel.conf
|
||||||
|
# We don't currently use any feature from simd-amd.conf
|
||||||
|
# CONF_FILES += 3rdparty/simd-amd.conf
|
||||||
|
|
||||||
|
all: $(TARGETDIR)/$(TARGETHEADER) $(TARGETDIR)/$(TARGETCPP)
|
||||||
|
$(TARGETHEADER): $(CONF_FILES) | $(GENERATOR)
|
||||||
|
cat $^ | perl $(GENERATOR) /dev/stdin $@ > $@
|
||||||
|
$(TARGETDIR)/$(TARGETHEADER): header $(TARGETHEADER)
|
||||||
|
sed '/-- implementation start --/,/-- implementation end --/d' $^ > $@
|
||||||
|
$(TARGETDIR)/$(TARGETCPP): $(TARGETHEADER) header
|
||||||
|
(cat header; echo '#include "$(TARGETHEADER)"'; sed '1,/-- implementation start --/d;/-- implementation end --/,$$d' $<) > $@
|
||||||
|
|
||||||
|
clean:
|
||||||
|
-$(RM) $(TARGETHEADER)
|
13
util/x86simdgen/README.md
Normal file
13
util/x86simdgen/README.md
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
# Scripts to regenerate the x86 SIMD flags
|
||||||
|
|
||||||
|
Upstream: https://github.com/opendcdiag/opendcdiag
|
||||||
|
License: Apache-2.0
|
||||||
|
|
||||||
|
The .conf files are meant to be edited and the options we want to use
|
||||||
|
are uncommented.
|
||||||
|
|
||||||
|
To regenerate:
|
||||||
|
make
|
||||||
|
|
||||||
|
Note: the license of the script does not affect the produced output's
|
||||||
|
license. Therefore, no qt_attribution.json file is provided.
|
@ -1,277 +0,0 @@
|
|||||||
#!/usr/bin/env perl
|
|
||||||
#############################################################################
|
|
||||||
##
|
|
||||||
## Copyright (C) 2018 Intel Corporation.
|
|
||||||
## Contact: https://www.qt.io/licensing/
|
|
||||||
##
|
|
||||||
## This file is part of the build configuration tools of the Qt Toolkit.
|
|
||||||
##
|
|
||||||
## $QT_BEGIN_LICENSE:MIT$
|
|
||||||
## Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
## of this software and associated documentation files (the "Software"), to deal
|
|
||||||
## in the Software without restriction, including without limitation the rights
|
|
||||||
## to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
## copies of the Software, and to permit persons to whom the Software is
|
|
||||||
## furnished to do so, subject to the following conditions:
|
|
||||||
##
|
|
||||||
## The above copyright notice and this permission notice shall be included in
|
|
||||||
## all copies or substantial portions of the Software.
|
|
||||||
##
|
|
||||||
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
||||||
## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
## OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
||||||
## THE SOFTWARE.
|
|
||||||
## $QT_END_LICENSE$
|
|
||||||
##
|
|
||||||
#############################################################################
|
|
||||||
|
|
||||||
use strict;
|
|
||||||
$\ = "\n";
|
|
||||||
$/ = "\n";
|
|
||||||
my %leaves = (
|
|
||||||
Leaf1EDX => "CPUID Leaf 1, EDX",
|
|
||||||
Leaf1ECX => "CPUID Leaf 1, ECX",
|
|
||||||
Leaf7_0EBX => "CPUID Leaf 7, Sub-leaf 0, EBX",
|
|
||||||
Leaf7_0ECX => "CPUID Leaf 7, Sub-leaf 0, ECX",
|
|
||||||
Leaf7_0EDX => "CPUID Leaf 7, Sub-leaf 0, EDX",
|
|
||||||
);
|
|
||||||
my @leafNames = sort keys %leaves;
|
|
||||||
|
|
||||||
# Read data from stdin
|
|
||||||
my $i = 1;
|
|
||||||
my @features;
|
|
||||||
while (<STDIN>) {
|
|
||||||
s/#.*$//;
|
|
||||||
chomp;
|
|
||||||
next if $_ eq "";
|
|
||||||
|
|
||||||
my ($name, $function, $bit, $depends) = split /\s+/;
|
|
||||||
die("Unknown CPUID function \"$function\"")
|
|
||||||
unless grep $function, @leafNames;
|
|
||||||
|
|
||||||
my $id = uc($name);
|
|
||||||
$id =~ s/[^A-Z0-9_]/_/g;
|
|
||||||
push @features,
|
|
||||||
{ name => $name, depends => $depends, id => $id, bit => $bit, leaf => $function };
|
|
||||||
++$i;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (my $h = shift @ARGV) {
|
|
||||||
open HEADER, ">", $h;
|
|
||||||
select HEADER;
|
|
||||||
}
|
|
||||||
|
|
||||||
# Print the qsimd_x86_p.h output
|
|
||||||
print q{/****************************************************************************
|
|
||||||
**
|
|
||||||
** Copyright (C) 2018 Intel Corporation.
|
|
||||||
** Contact: https://www.qt.io/licensing/
|
|
||||||
**
|
|
||||||
** This file is part of the QtCore module of the Qt Toolkit.
|
|
||||||
**
|
|
||||||
** $QT_BEGIN_LICENSE:LGPL$
|
|
||||||
** Commercial License Usage
|
|
||||||
** Licensees holding valid commercial Qt licenses may use this file in
|
|
||||||
** accordance with the commercial license agreement provided with the
|
|
||||||
** Software or, alternatively, in accordance with the terms contained in
|
|
||||||
** a written agreement between you and The Qt Company. For licensing terms
|
|
||||||
** and conditions see https://www.qt.io/terms-conditions. For further
|
|
||||||
** information use the contact form at https://www.qt.io/contact-us.
|
|
||||||
**
|
|
||||||
** GNU Lesser General Public License Usage
|
|
||||||
** Alternatively, this file may be used under the terms of the GNU Lesser
|
|
||||||
** General Public License version 3 as published by the Free Software
|
|
||||||
** Foundation and appearing in the file LICENSE.LGPL3 included in the
|
|
||||||
** packaging of this file. Please review the following information to
|
|
||||||
** ensure the GNU Lesser General Public License version 3 requirements
|
|
||||||
** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
|
|
||||||
**
|
|
||||||
** GNU General Public License Usage
|
|
||||||
** Alternatively, this file may be used under the terms of the GNU
|
|
||||||
** General Public License version 2.0 or (at your option) the GNU General
|
|
||||||
** Public license version 3 or any later version approved by the KDE Free
|
|
||||||
** Qt Foundation. The licenses are as published by the Free Software
|
|
||||||
** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
|
|
||||||
** included in the packaging of this file. Please review the following
|
|
||||||
** information to ensure the GNU General Public License requirements will
|
|
||||||
** be met: https://www.gnu.org/licenses/gpl-2.0.html and
|
|
||||||
** https://www.gnu.org/licenses/gpl-3.0.html.
|
|
||||||
**
|
|
||||||
** $QT_END_LICENSE$
|
|
||||||
**
|
|
||||||
****************************************************************************/
|
|
||||||
|
|
||||||
// This is a generated file. DO NOT EDIT.
|
|
||||||
// Please see util/x86simdgen/generate.pl";
|
|
||||||
#ifndef QSIMD_P_H
|
|
||||||
# error "Please include <private/qsimd_p.h> instead"
|
|
||||||
#endif
|
|
||||||
#ifndef QSIMD_X86_P_H
|
|
||||||
#define QSIMD_X86_P_H
|
|
||||||
|
|
||||||
#include "qsimd_p.h"
|
|
||||||
|
|
||||||
//
|
|
||||||
// W A R N I N G
|
|
||||||
// -------------
|
|
||||||
//
|
|
||||||
// This file is not part of the Qt API. It exists purely as an
|
|
||||||
// implementation detail. This header file may change from version to
|
|
||||||
// version without notice, or even be removed.
|
|
||||||
//
|
|
||||||
// We mean it.
|
|
||||||
//
|
|
||||||
|
|
||||||
QT_BEGIN_NAMESPACE
|
|
||||||
|
|
||||||
// used only to indicate that the CPU detection was initialized
|
|
||||||
#define QSimdInitialized (Q_UINT64_C(1) << 0)};
|
|
||||||
|
|
||||||
# Print the enum
|
|
||||||
my $lastleaf;
|
|
||||||
for (my $i = 0; $i < scalar @features; ++$i) {
|
|
||||||
my $feature = $features[$i];
|
|
||||||
# Leaf header:
|
|
||||||
printf "\n// in %s:\n", $leaves{$feature->{leaf}}
|
|
||||||
if $feature->{leaf} ne $lastleaf;
|
|
||||||
$lastleaf = $feature->{leaf};
|
|
||||||
|
|
||||||
# Feature
|
|
||||||
printf "#define CpuFeature%-33s (Q_UINT64_C(1) << %d)\n", $feature->{id}, $i + 1;
|
|
||||||
|
|
||||||
# Feature string names for Clang and GCC
|
|
||||||
my $str = $feature->{name};
|
|
||||||
$str .= ",$feature->{depends}" if defined($feature->{depends});
|
|
||||||
printf "#define QT_FUNCTION_TARGET_STRING_%-17s \"%s\"\n",
|
|
||||||
$feature->{id}, $str;
|
|
||||||
}
|
|
||||||
|
|
||||||
print q{
|
|
||||||
static const quint64 qCompilerCpuFeatures = 0};
|
|
||||||
|
|
||||||
# And print the compiler-enabled features part:
|
|
||||||
for (my $i = 0; $i < scalar @features; ++$i) {
|
|
||||||
my $feature = $features[$i];
|
|
||||||
printf
|
|
||||||
"#ifdef __%s__\n" .
|
|
||||||
" | CpuFeature%s\n" .
|
|
||||||
"#endif\n",
|
|
||||||
$feature->{id}, $feature->{id};
|
|
||||||
}
|
|
||||||
|
|
||||||
print q{ ;
|
|
||||||
|
|
||||||
QT_END_NAMESPACE
|
|
||||||
|
|
||||||
#endif // QSIMD_X86_P_H
|
|
||||||
};
|
|
||||||
|
|
||||||
if (my $cpp = shift @ARGV) {
|
|
||||||
open CPP, ">", $cpp;
|
|
||||||
select CPP;
|
|
||||||
} else {
|
|
||||||
print q{
|
|
||||||
|
|
||||||
---- cut here, paste the rest into qsimd_x86.cpp ---
|
|
||||||
|
|
||||||
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
print q{/****************************************************************************
|
|
||||||
**
|
|
||||||
** Copyright (C) 2018 Intel Corporation.
|
|
||||||
** Contact: https://www.qt.io/licensing/
|
|
||||||
**
|
|
||||||
** This file is part of the QtCore module of the Qt Toolkit.
|
|
||||||
**
|
|
||||||
** $QT_BEGIN_LICENSE:LGPL$
|
|
||||||
** Commercial License Usage
|
|
||||||
** Licensees holding valid commercial Qt licenses may use this file in
|
|
||||||
** accordance with the commercial license agreement provided with the
|
|
||||||
** Software or, alternatively, in accordance with the terms contained in
|
|
||||||
** a written agreement between you and The Qt Company. For licensing terms
|
|
||||||
** and conditions see https://www.qt.io/terms-conditions. For further
|
|
||||||
** information use the contact form at https://www.qt.io/contact-us.
|
|
||||||
**
|
|
||||||
** GNU Lesser General Public License Usage
|
|
||||||
** Alternatively, this file may be used under the terms of the GNU Lesser
|
|
||||||
** General Public License version 3 as published by the Free Software
|
|
||||||
** Foundation and appearing in the file LICENSE.LGPL3 included in the
|
|
||||||
** packaging of this file. Please review the following information to
|
|
||||||
** ensure the GNU Lesser General Public License version 3 requirements
|
|
||||||
** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
|
|
||||||
**
|
|
||||||
** GNU General Public License Usage
|
|
||||||
** Alternatively, this file may be used under the terms of the GNU
|
|
||||||
** General Public License version 2.0 or (at your option) the GNU General
|
|
||||||
** Public license version 3 or any later version approved by the KDE Free
|
|
||||||
** Qt Foundation. The licenses are as published by the Free Software
|
|
||||||
** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
|
|
||||||
** included in the packaging of this file. Please review the following
|
|
||||||
** information to ensure the GNU General Public License requirements will
|
|
||||||
** be met: https://www.gnu.org/licenses/gpl-2.0.html and
|
|
||||||
** https://www.gnu.org/licenses/gpl-3.0.html.
|
|
||||||
**
|
|
||||||
** $QT_END_LICENSE$
|
|
||||||
**
|
|
||||||
****************************************************************************/
|
|
||||||
|
|
||||||
// This is a generated file. DO NOT EDIT.
|
|
||||||
// Please see util/x86simdgen/generate.pl";
|
|
||||||
#include "qsimd_p.h"
|
|
||||||
};
|
|
||||||
|
|
||||||
# Now generate the string table and bit-location array
|
|
||||||
my $offset = 0;
|
|
||||||
my @offsets;
|
|
||||||
print "static const char features_string[] =";
|
|
||||||
for my $feature (@features) {
|
|
||||||
print " \" $feature->{name}\\0\"";
|
|
||||||
push @offsets, $offset;
|
|
||||||
$offset += 2 + length($feature->{name});
|
|
||||||
}
|
|
||||||
print " \"\\0\";";
|
|
||||||
|
|
||||||
# Print the string offset table
|
|
||||||
printf "\nstatic const %s features_indices[] = {\n %3d",
|
|
||||||
$offset > 255 ? "quint16" : "quint8", $offset;
|
|
||||||
for (my $j = 0; $j < scalar @offsets; ++$j) {
|
|
||||||
printf ",%s%3d",
|
|
||||||
($j + 1) % 8 ? " " : "\n ", $offsets[$j];
|
|
||||||
}
|
|
||||||
print "\n};";
|
|
||||||
|
|
||||||
# Print the locator enum and table
|
|
||||||
print "\nenum X86CpuidLeaves {";
|
|
||||||
map { print " $_," } @leafNames;
|
|
||||||
print " X86CpuidMaxLeaf\n};";
|
|
||||||
|
|
||||||
my $type = scalar %leaves > 8 ? "quint16" : "quint8";
|
|
||||||
printf "\nstatic const %s x86_locators[] = {",
|
|
||||||
$type, $type;
|
|
||||||
my $lastname;
|
|
||||||
for (my $j = 0; $j < scalar @features; ++$j) {
|
|
||||||
my $feature = $features[$j];
|
|
||||||
printf ", // %s", $lastname
|
|
||||||
if defined($lastname);
|
|
||||||
printf "\n %s*32 + %2d",
|
|
||||||
$feature->{leaf}, $feature->{bit};
|
|
||||||
$lastname = $feature->{name};
|
|
||||||
}
|
|
||||||
printf qq{ // $lastname
|
|
||||||
\};
|
|
||||||
|
|
||||||
// List of AVX512 features (see detectProcessorFeatures())
|
|
||||||
static const quint64 AllAVX512 = 0};
|
|
||||||
|
|
||||||
# Print AVX512 features
|
|
||||||
for (my $j = 0; $j < scalar @features; ++$j) {
|
|
||||||
my $feature = $features[$j];
|
|
||||||
$_ = $feature->{id};
|
|
||||||
printf "\n | CpuFeature%s", $_ if /AVX512/;
|
|
||||||
}
|
|
||||||
print ";";
|
|
39
util/x86simdgen/header
Normal file
39
util/x86simdgen/header
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
/****************************************************************************
|
||||||
|
**
|
||||||
|
** Copyright (C) 2022 Intel Corporation.
|
||||||
|
** Contact: https://www.qt.io/licensing/
|
||||||
|
**
|
||||||
|
** This file is part of the QtCore module of the Qt Toolkit.
|
||||||
|
**
|
||||||
|
** $QT_BEGIN_LICENSE:LGPL$
|
||||||
|
** Commercial License Usage
|
||||||
|
** Licensees holding valid commercial Qt licenses may use this file in
|
||||||
|
** accordance with the commercial license agreement provided with the
|
||||||
|
** Software or, alternatively, in accordance with the terms contained in
|
||||||
|
** a written agreement between you and The Qt Company. For licensing terms
|
||||||
|
** and conditions see https://www.qt.io/terms-conditions. For further
|
||||||
|
** information use the contact form at https://www.qt.io/contact-us.
|
||||||
|
**
|
||||||
|
** GNU Lesser General Public License Usage
|
||||||
|
** Alternatively, this file may be used under the terms of the GNU Lesser
|
||||||
|
** General Public License version 3 as published by the Free Software
|
||||||
|
** Foundation and appearing in the file LICENSE.LGPL3 included in the
|
||||||
|
** packaging of this file. Please review the following information to
|
||||||
|
** ensure the GNU Lesser General Public License version 3 requirements
|
||||||
|
** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
|
||||||
|
**
|
||||||
|
** GNU General Public License Usage
|
||||||
|
** Alternatively, this file may be used under the terms of the GNU
|
||||||
|
** General Public License version 2.0 or (at your option) the GNU General
|
||||||
|
** Public license version 3 or any later version approved by the KDE Free
|
||||||
|
** Qt Foundation. The licenses are as published by the Free Software
|
||||||
|
** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
|
||||||
|
** included in the packaging of this file. Please review the following
|
||||||
|
** information to ensure the GNU General Public License requirements will
|
||||||
|
** be met: https://www.gnu.org/licenses/gpl-2.0.html and
|
||||||
|
** https://www.gnu.org/licenses/gpl-3.0.html.
|
||||||
|
**
|
||||||
|
** $QT_END_LICENSE$
|
||||||
|
**
|
||||||
|
****************************************************************************/
|
||||||
|
|
@ -1,37 +0,0 @@
|
|||||||
# Feature CPUID function Bit Required feature
|
|
||||||
sse2 Leaf1EDX 26
|
|
||||||
sse3 Leaf1ECX 0
|
|
||||||
ssse3 Leaf1ECX 9
|
|
||||||
fma Leaf1ECX 12
|
|
||||||
sse4.1 Leaf1ECX 19
|
|
||||||
sse4.2 Leaf1ECX 20
|
|
||||||
movbe Leaf1ECX 22
|
|
||||||
popcnt Leaf1ECX 23
|
|
||||||
aes Leaf1ECX 25 sse4.2
|
|
||||||
avx Leaf1ECX 28
|
|
||||||
f16c Leaf1ECX 29
|
|
||||||
rdrnd Leaf1ECX 30
|
|
||||||
bmi Leaf7_0EBX 3
|
|
||||||
hle Leaf7_0EBX 4
|
|
||||||
avx2 Leaf7_0EBX 5
|
|
||||||
bmi2 Leaf7_0EBX 8
|
|
||||||
rtm Leaf7_0EBX 11
|
|
||||||
avx512f Leaf7_0EBX 16
|
|
||||||
avx512dq Leaf7_0EBX 17
|
|
||||||
rdseed Leaf7_0EBX 18
|
|
||||||
avx512ifma Leaf7_0EBX 21
|
|
||||||
avx512pf Leaf7_0EBX 26
|
|
||||||
avx512er Leaf7_0EBX 27
|
|
||||||
avx512cd Leaf7_0EBX 28
|
|
||||||
sha Leaf7_0EBX 29
|
|
||||||
avx512bw Leaf7_0EBX 30
|
|
||||||
avx512vl Leaf7_0EBX 31
|
|
||||||
avx512vbmi Leaf7_0ECX 1
|
|
||||||
avx512vbmi2 Leaf7_0ECX 6
|
|
||||||
gfni Leaf7_0ECX 8
|
|
||||||
vaes Leaf7_0ECX 9
|
|
||||||
avx512vnni Leaf7_0ECX 11
|
|
||||||
avx512bitalg Leaf7_0ECX 12
|
|
||||||
avx512vpopcntdq Leaf7_0ECX 14
|
|
||||||
avx5124nniw Leaf7_0EDX 2
|
|
||||||
avx5124fmaps Leaf7_0EDX 3
|
|
Loading…
x
Reference in New Issue
Block a user