qsimd: update the generator script from OpenDCDiag
I'd been making changes to that and improving it for the past 2 years without bringing it back into Qt. The list of features is mostly the same, except: - removed TSX features - removed features specific to Xeon Phi processors - added CET and AVX512FP16 features - added the bit for hybrid CPU detection See matching update at https://github.com/opendcdiag/opendcdiag/pull/49 Change-Id: I6fcda969a9e9427198bffffd16ce860b5a38aece Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
This commit is contained in:
parent
05428d9b97
commit
b852584556
@ -350,22 +350,6 @@ static void xgetbv(uint in, uint &eax, uint &edx)
|
||||
#endif
|
||||
}
|
||||
|
||||
// Flags from the XCR0 state register
|
||||
enum XCR0Flags {
|
||||
X87 = 1 << 0,
|
||||
XMM0_15 = 1 << 1,
|
||||
YMM0_15Hi128 = 1 << 2,
|
||||
BNDRegs = 1 << 3,
|
||||
BNDCSR = 1 << 4,
|
||||
OpMask = 1 << 5,
|
||||
ZMM0_15Hi256 = 1 << 6,
|
||||
ZMM16_31 = 1 << 7,
|
||||
|
||||
SSEState = XMM0_15,
|
||||
AVXState = XMM0_15 | YMM0_15Hi128,
|
||||
AVX512State = AVXState | OpMask | ZMM0_15Hi256 | ZMM16_31
|
||||
};
|
||||
|
||||
QT_FUNCTION_TARGET_BASELINE
|
||||
static quint64 adjustedXcr0(quint64 xcr0)
|
||||
{
|
||||
@ -386,7 +370,7 @@ static quint64 adjustedXcr0(quint64 xcr0)
|
||||
constexpr quintptr cpu_capabilities64 = commpage + 0x10;
|
||||
quint64 capab = *reinterpret_cast<quint64 *>(cpu_capabilities64);
|
||||
if (capab & kHasAVX512F)
|
||||
xcr0 |= AVX512State;
|
||||
xcr0 |= XSave_Avx512State;
|
||||
#endif
|
||||
|
||||
return xcr0;
|
||||
@ -395,9 +379,6 @@ static quint64 adjustedXcr0(quint64 xcr0)
|
||||
QT_FUNCTION_TARGET_BASELINE
|
||||
static quint64 detectProcessorFeatures()
|
||||
{
|
||||
static const quint64 AllAVX = AllAVX512 | CpuFeatureAVX | CpuFeatureAVX2 | CpuFeatureF16C
|
||||
| CpuFeatureFMA | CpuFeatureVAES;
|
||||
|
||||
quint64 features = 0;
|
||||
int cpuidLevel = maxBasicCpuidSupported();
|
||||
#if Q_PROCESSOR_X86 < 5
|
||||
@ -408,38 +389,35 @@ static quint64 detectProcessorFeatures()
|
||||
#endif
|
||||
|
||||
uint results[X86CpuidMaxLeaf] = {};
|
||||
cpuidFeatures01(results[Leaf1ECX], results[Leaf1EDX]);
|
||||
cpuidFeatures01(results[Leaf01ECX], results[Leaf01EDX]);
|
||||
if (cpuidLevel >= 7)
|
||||
cpuidFeatures07_00(results[Leaf7_0EBX], results[Leaf7_0ECX], results[Leaf7_0EDX]);
|
||||
cpuidFeatures07_00(results[Leaf07_00EBX], results[Leaf07_00ECX], results[Leaf07_00EDX]);
|
||||
|
||||
// populate our feature list
|
||||
for (uint i = 0; i < sizeof(x86_locators) / sizeof(x86_locators[0]); ++i) {
|
||||
for (uint i = 0; i < std::size(x86_locators); ++i) {
|
||||
uint word = x86_locators[i] / 32;
|
||||
uint bit = 1U << (x86_locators[i] % 32);
|
||||
quint64 feature = Q_UINT64_C(1) << (i + 1);
|
||||
quint64 feature = Q_UINT64_C(1) << i;
|
||||
if (results[word] & bit)
|
||||
features |= feature;
|
||||
}
|
||||
|
||||
// now check the AVX state
|
||||
quint64 xcr0 = 0;
|
||||
if (results[Leaf1ECX] & (1u << 27)) {
|
||||
if (results[Leaf01ECX] & (1u << 27)) {
|
||||
// XGETBV enabled
|
||||
uint xgetbvA = 0, xgetbvD = 0;
|
||||
xgetbv(0, xgetbvA, xgetbvD);
|
||||
|
||||
xcr0 = xgetbvA;
|
||||
if (sizeof(XCR0Flags) > sizeof(xgetbvA))
|
||||
if (sizeof(XSaveBits) > sizeof(xgetbvA))
|
||||
xcr0 |= quint64(xgetbvD) << 32;
|
||||
xcr0 = adjustedXcr0(xcr0);
|
||||
}
|
||||
|
||||
if ((xcr0 & AVXState) != AVXState) {
|
||||
// support for YMM registers is disabled, disable all AVX
|
||||
features &= ~AllAVX;
|
||||
} else if ((xcr0 & AVX512State) != AVX512State) {
|
||||
// support for ZMM registers or mask registers is disabled, disable all AVX512
|
||||
features &= ~AllAVX512;
|
||||
for (auto req : xsave_requirements) {
|
||||
if ((xcr0 & req.xsave_state) != req.xsave_state)
|
||||
features &= ~req.cpu_features;
|
||||
}
|
||||
|
||||
if (features & CpuFeatureRDRND && !checkRdrndWorks())
|
||||
|
@ -246,10 +246,12 @@ asm(
|
||||
# define __haswell__ 1
|
||||
# endif
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
static const quint64 qCompilerCpuFeatures = _compilerCpuFeatures;
|
||||
|
||||
// This constant does not include all CPU features found in a Haswell, only
|
||||
// those that we'd have optimized code for.
|
||||
// Note: must use Q_CONSTEXPR here, as this file may be compiled in C mode.
|
||||
QT_BEGIN_NAMESPACE
|
||||
static const quint64 CpuFeatureArchHaswell = 0
|
||||
| CpuFeatureSSE2
|
||||
| CpuFeatureSSE3
|
||||
|
@ -1,6 +1,6 @@
|
||||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2018 Intel Corporation.
|
||||
** Copyright (C) 2022 Intel Corporation.
|
||||
** Contact: https://www.qt.io/licensing/
|
||||
**
|
||||
** This file is part of the QtCore module of the Qt Toolkit.
|
||||
@ -37,9 +37,7 @@
|
||||
**
|
||||
****************************************************************************/
|
||||
|
||||
// This is a generated file. DO NOT EDIT.
|
||||
// Please see util/x86simdgen/generate.pl
|
||||
#include "qsimd_p.h"
|
||||
#include "qsimd_x86_p.h"
|
||||
|
||||
static const char features_string[] =
|
||||
" sse2\0"
|
||||
@ -55,101 +53,188 @@ static const char features_string[] =
|
||||
" f16c\0"
|
||||
" rdrnd\0"
|
||||
" bmi\0"
|
||||
" hle\0"
|
||||
" avx2\0"
|
||||
" bmi2\0"
|
||||
" rtm\0"
|
||||
" avx512f\0"
|
||||
" avx512dq\0"
|
||||
" rdseed\0"
|
||||
" avx512ifma\0"
|
||||
" avx512pf\0"
|
||||
" avx512er\0"
|
||||
" avx512cd\0"
|
||||
" sha\0"
|
||||
" avx512bw\0"
|
||||
" avx512vl\0"
|
||||
" avx512vbmi\0"
|
||||
" avx512vbmi2\0"
|
||||
" shstk\0"
|
||||
" gfni\0"
|
||||
" vaes\0"
|
||||
" avx512vnni\0"
|
||||
" avx512bitalg\0"
|
||||
" avx512vpopcntdq\0"
|
||||
" avx5124nniw\0"
|
||||
" avx5124fmaps\0"
|
||||
" hybrid\0"
|
||||
" ibt\0"
|
||||
" avx512fp16\0"
|
||||
"\0";
|
||||
|
||||
static const quint16 features_indices[] = {
|
||||
306, 0, 6, 12, 19, 24, 32, 40,
|
||||
47, 55, 60, 65, 71, 78, 83, 88,
|
||||
94, 100, 105, 114, 124, 132, 144, 154,
|
||||
164, 174, 179, 189, 199, 211, 224, 230,
|
||||
236, 248, 262, 279, 292
|
||||
static const uint16_t features_indices[] = {
|
||||
0, 6, 12, 19, 24, 32, 40, 47,
|
||||
55, 60, 65, 71, 78, 83, 89, 95,
|
||||
104, 114, 122, 134, 144, 149, 159, 169,
|
||||
181, 194, 201, 207, 213, 225, 239, 256,
|
||||
264, 269,
|
||||
};
|
||||
|
||||
enum X86CpuidLeaves {
|
||||
Leaf1ECX,
|
||||
Leaf1EDX,
|
||||
Leaf7_0EBX,
|
||||
Leaf7_0ECX,
|
||||
Leaf7_0EDX,
|
||||
Leaf01EDX,
|
||||
Leaf01ECX,
|
||||
Leaf07_00EBX,
|
||||
Leaf07_00ECX,
|
||||
Leaf07_00EDX,
|
||||
Leaf07_01EAX,
|
||||
Leaf13_01EAX,
|
||||
Leaf80000001hECX,
|
||||
Leaf80000008hEBX,
|
||||
X86CpuidMaxLeaf
|
||||
};
|
||||
|
||||
static const quint8 x86_locators[] = {
|
||||
Leaf1EDX*32 + 26, // sse2
|
||||
Leaf1ECX*32 + 0, // sse3
|
||||
Leaf1ECX*32 + 9, // ssse3
|
||||
Leaf1ECX*32 + 12, // fma
|
||||
Leaf1ECX*32 + 19, // sse4.1
|
||||
Leaf1ECX*32 + 20, // sse4.2
|
||||
Leaf1ECX*32 + 22, // movbe
|
||||
Leaf1ECX*32 + 23, // popcnt
|
||||
Leaf1ECX*32 + 25, // aes
|
||||
Leaf1ECX*32 + 28, // avx
|
||||
Leaf1ECX*32 + 29, // f16c
|
||||
Leaf1ECX*32 + 30, // rdrnd
|
||||
Leaf7_0EBX*32 + 3, // bmi
|
||||
Leaf7_0EBX*32 + 4, // hle
|
||||
Leaf7_0EBX*32 + 5, // avx2
|
||||
Leaf7_0EBX*32 + 8, // bmi2
|
||||
Leaf7_0EBX*32 + 11, // rtm
|
||||
Leaf7_0EBX*32 + 16, // avx512f
|
||||
Leaf7_0EBX*32 + 17, // avx512dq
|
||||
Leaf7_0EBX*32 + 18, // rdseed
|
||||
Leaf7_0EBX*32 + 21, // avx512ifma
|
||||
Leaf7_0EBX*32 + 26, // avx512pf
|
||||
Leaf7_0EBX*32 + 27, // avx512er
|
||||
Leaf7_0EBX*32 + 28, // avx512cd
|
||||
Leaf7_0EBX*32 + 29, // sha
|
||||
Leaf7_0EBX*32 + 30, // avx512bw
|
||||
Leaf7_0EBX*32 + 31, // avx512vl
|
||||
Leaf7_0ECX*32 + 1, // avx512vbmi
|
||||
Leaf7_0ECX*32 + 6, // avx512vbmi2
|
||||
Leaf7_0ECX*32 + 8, // gfni
|
||||
Leaf7_0ECX*32 + 9, // vaes
|
||||
Leaf7_0ECX*32 + 11, // avx512vnni
|
||||
Leaf7_0ECX*32 + 12, // avx512bitalg
|
||||
Leaf7_0ECX*32 + 14, // avx512vpopcntdq
|
||||
Leaf7_0EDX*32 + 2, // avx5124nniw
|
||||
Leaf7_0EDX*32 + 3 // avx5124fmaps
|
||||
static const uint16_t x86_locators[] = {
|
||||
Leaf01EDX*32 + 26, // sse2
|
||||
Leaf01ECX*32 + 0, // sse3
|
||||
Leaf01ECX*32 + 9, // ssse3
|
||||
Leaf01ECX*32 + 12, // fma
|
||||
Leaf01ECX*32 + 19, // sse4.1
|
||||
Leaf01ECX*32 + 20, // sse4.2
|
||||
Leaf01ECX*32 + 22, // movbe
|
||||
Leaf01ECX*32 + 23, // popcnt
|
||||
Leaf01ECX*32 + 25, // aes
|
||||
Leaf01ECX*32 + 28, // avx
|
||||
Leaf01ECX*32 + 29, // f16c
|
||||
Leaf01ECX*32 + 30, // rdrnd
|
||||
Leaf07_00EBX*32 + 3, // bmi
|
||||
Leaf07_00EBX*32 + 5, // avx2
|
||||
Leaf07_00EBX*32 + 8, // bmi2
|
||||
Leaf07_00EBX*32 + 16, // avx512f
|
||||
Leaf07_00EBX*32 + 17, // avx512dq
|
||||
Leaf07_00EBX*32 + 18, // rdseed
|
||||
Leaf07_00EBX*32 + 21, // avx512ifma
|
||||
Leaf07_00EBX*32 + 28, // avx512cd
|
||||
Leaf07_00EBX*32 + 29, // sha
|
||||
Leaf07_00EBX*32 + 30, // avx512bw
|
||||
Leaf07_00EBX*32 + 31, // avx512vl
|
||||
Leaf07_00ECX*32 + 1, // avx512vbmi
|
||||
Leaf07_00ECX*32 + 6, // avx512vbmi2
|
||||
Leaf07_00ECX*32 + 7, // shstk
|
||||
Leaf07_00ECX*32 + 8, // gfni
|
||||
Leaf07_00ECX*32 + 9, // vaes
|
||||
Leaf07_00ECX*32 + 11, // avx512vnni
|
||||
Leaf07_00ECX*32 + 12, // avx512bitalg
|
||||
Leaf07_00ECX*32 + 14, // avx512vpopcntdq
|
||||
Leaf07_00EDX*32 + 15, // hybrid
|
||||
Leaf07_00EDX*32 + 20, // ibt
|
||||
Leaf07_00EDX*32 + 23, // avx512fp16
|
||||
};
|
||||
|
||||
struct X86Architecture
|
||||
{
|
||||
uint64_t features;
|
||||
char name[17 + 1];
|
||||
};
|
||||
|
||||
static const struct X86Architecture x86_architectures[] = {
|
||||
{ cpu_sapphirerapids, "Sapphire Rapids" },
|
||||
{ cpu_tigerlake, "Tiger Lake" },
|
||||
{ cpu_icelake_server, "Ice Lake (Server)" },
|
||||
{ cpu_icelake_client, "Ice Lake (Client)" },
|
||||
{ cpu_alderlake, "Alder Lake" },
|
||||
{ cpu_cooperlake, "Cooper Lake" },
|
||||
{ cpu_cannonlake, "Cannon Lake" },
|
||||
{ cpu_cascadelake, "Cascade Lake" },
|
||||
{ cpu_skylake_avx512, "Skylake (Avx512)" },
|
||||
{ cpu_skylake, "Skylake" },
|
||||
{ cpu_tremont, "Tremont" },
|
||||
{ cpu_broadwell, "Broadwell" },
|
||||
{ cpu_haswell, "Haswell" },
|
||||
{ cpu_goldmont, "Goldmont" },
|
||||
{ cpu_ivybridge, "Ivy Bridge" },
|
||||
{ cpu_silvermont, "Silvermont" },
|
||||
{ cpu_sandybridge, "Sandy Bridge" },
|
||||
{ cpu_westmere, "Westmere" },
|
||||
{ cpu_core2, "Core2" },
|
||||
};
|
||||
|
||||
enum XSaveBits {
|
||||
XSave_X87 = 0x0001, // X87 and MMX state
|
||||
XSave_SseState = 0x0002, // SSE: 128 bits of XMM registers
|
||||
XSave_Ymm_Hi128 = 0x0004, // AVX: high 128 bits in YMM registers
|
||||
XSave_Bndregs = 0x0008, // Memory Protection Extensions
|
||||
XSave_Bndcsr = 0x0010, // Memory Protection Extensions
|
||||
XSave_OpMask = 0x0020, // AVX512: k0 through k7
|
||||
XSave_Zmm_Hi256 = 0x0040, // AVX512: high 256 bits of ZMM0-15
|
||||
XSave_Hi16_Zmm = 0x0080, // AVX512: all 512 bits of ZMM16-31
|
||||
XSave_PTState = 0x0100, // Processor Trace
|
||||
XSave_PKRUState = 0x0200, // Protection Key
|
||||
XSave_CetUState = 0x0800, // CET: user mode
|
||||
XSave_CetSState = 0x1000, // CET: supervisor mode
|
||||
XSave_HdcState = 0x2000, // Hardware Duty Cycle
|
||||
XSave_UintrState = 0x4000, // User Interrupts
|
||||
XSave_HwpState = 0x10000, // Hardware P-State
|
||||
XSave_Xtilecfg = 0x20000, // AMX: XTILECFG register
|
||||
XSave_Xtiledata = 0x40000, // AMX: data in the tiles
|
||||
XSave_AvxState = XSave_SseState | XSave_Ymm_Hi128,
|
||||
XSave_MPXState = XSave_Bndregs | XSave_Bndcsr,
|
||||
XSave_Avx512State = XSave_AvxState | XSave_OpMask | XSave_Zmm_Hi256 | XSave_Hi16_Zmm,
|
||||
XSave_CetState = XSave_CetUState | XSave_CetSState,
|
||||
XSave_AmxState = XSave_Xtilecfg | XSave_Xtiledata,
|
||||
};
|
||||
|
||||
// List of features requiring XSave_AvxState
|
||||
static const uint64_t XSaveReq_AvxState = 0
|
||||
| cpu_feature_fma
|
||||
| cpu_feature_avx
|
||||
| cpu_feature_f16c
|
||||
| cpu_feature_avx2
|
||||
| cpu_feature_avx512f
|
||||
| cpu_feature_avx512dq
|
||||
| cpu_feature_avx512ifma
|
||||
| cpu_feature_avx512cd
|
||||
| cpu_feature_avx512bw
|
||||
| cpu_feature_avx512vl
|
||||
| cpu_feature_avx512vbmi
|
||||
| cpu_feature_avx512vbmi2
|
||||
| cpu_feature_vaes
|
||||
| cpu_feature_avx512vnni
|
||||
| cpu_feature_avx512bitalg
|
||||
| cpu_feature_avx512vpopcntdq
|
||||
| cpu_feature_avx512fp16;
|
||||
|
||||
// List of features requiring XSave_Avx512State
|
||||
static const uint64_t XSaveReq_Avx512State = 0
|
||||
| cpu_feature_avx512f
|
||||
| cpu_feature_avx512dq
|
||||
| cpu_feature_avx512ifma
|
||||
| cpu_feature_avx512cd
|
||||
| cpu_feature_avx512bw
|
||||
| cpu_feature_avx512vl
|
||||
| cpu_feature_avx512vbmi
|
||||
| cpu_feature_avx512vbmi2
|
||||
| cpu_feature_avx512vnni
|
||||
| cpu_feature_avx512bitalg
|
||||
| cpu_feature_avx512vpopcntdq
|
||||
| cpu_feature_avx512fp16;
|
||||
|
||||
// List of features requiring XSave_CetState
|
||||
static const uint64_t XSaveReq_CetState = 0
|
||||
| cpu_feature_shstk;
|
||||
|
||||
struct XSaveRequirementMapping
|
||||
{
|
||||
uint64_t cpu_features;
|
||||
uint64_t xsave_state;
|
||||
};
|
||||
|
||||
static const struct XSaveRequirementMapping xsave_requirements[] = {
|
||||
{ XSaveReq_AvxState, XSave_AvxState },
|
||||
{ XSaveReq_Avx512State, XSave_Avx512State },
|
||||
{ XSaveReq_CetState, XSave_CetState },
|
||||
};
|
||||
|
||||
// List of AVX512 features (see detectProcessorFeatures())
|
||||
static const quint64 AllAVX512 = 0
|
||||
| CpuFeatureAVX512F
|
||||
| CpuFeatureAVX512DQ
|
||||
| CpuFeatureAVX512IFMA
|
||||
| CpuFeatureAVX512PF
|
||||
| CpuFeatureAVX512ER
|
||||
| CpuFeatureAVX512CD
|
||||
| CpuFeatureAVX512BW
|
||||
| CpuFeatureAVX512VL
|
||||
| CpuFeatureAVX512VBMI
|
||||
| CpuFeatureAVX512VBMI2
|
||||
| CpuFeatureAVX512VNNI
|
||||
| CpuFeatureAVX512BITALG
|
||||
| CpuFeatureAVX512VPOPCNTDQ
|
||||
| CpuFeatureAVX5124NNIW
|
||||
| CpuFeatureAVX5124FMAPS;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2018 Intel Corporation.
|
||||
** Copyright (C) 2022 Intel Corporation.
|
||||
** Contact: https://www.qt.io/licensing/
|
||||
**
|
||||
** This file is part of the QtCore module of the Qt Toolkit.
|
||||
@ -38,224 +38,358 @@
|
||||
****************************************************************************/
|
||||
|
||||
// This is a generated file. DO NOT EDIT.
|
||||
// Please see util/x86simdgen/generate.pl
|
||||
#ifndef QSIMD_P_H
|
||||
# error "Please include <private/qsimd_p.h> instead"
|
||||
#endif
|
||||
// Please see 3rdparty/x86simd_generate.pl
|
||||
#ifndef QSIMD_X86_P_H
|
||||
#define QSIMD_X86_P_H
|
||||
|
||||
#include "qsimd_p.h"
|
||||
|
||||
//
|
||||
// W A R N I N G
|
||||
// -------------
|
||||
//
|
||||
// This file is not part of the Qt API. It exists purely as an
|
||||
// implementation detail. This header file may change from version to
|
||||
// version without notice, or even be removed.
|
||||
//
|
||||
// We mean it.
|
||||
//
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
// used only to indicate that the CPU detection was initialized
|
||||
#define QSimdInitialized (Q_UINT64_C(1) << 0)
|
||||
#include <stdint.h>
|
||||
|
||||
// in CPUID Leaf 1, EDX:
|
||||
#define CpuFeatureSSE2 (Q_UINT64_C(1) << 1)
|
||||
#define cpu_feature_sse2 (UINT64_C(1) << 0)
|
||||
#define QT_FUNCTION_TARGET_STRING_SSE2 "sse2"
|
||||
|
||||
// in CPUID Leaf 1, ECX:
|
||||
#define CpuFeatureSSE3 (Q_UINT64_C(1) << 2)
|
||||
#define cpu_feature_sse3 (UINT64_C(1) << 1)
|
||||
#define QT_FUNCTION_TARGET_STRING_SSE3 "sse3"
|
||||
#define CpuFeatureSSSE3 (Q_UINT64_C(1) << 3)
|
||||
#define cpu_feature_ssse3 (UINT64_C(1) << 2)
|
||||
#define QT_FUNCTION_TARGET_STRING_SSSE3 "ssse3"
|
||||
#define CpuFeatureFMA (Q_UINT64_C(1) << 4)
|
||||
#define cpu_feature_fma (UINT64_C(1) << 3)
|
||||
#define QT_FUNCTION_TARGET_STRING_FMA "fma"
|
||||
#define CpuFeatureSSE4_1 (Q_UINT64_C(1) << 5)
|
||||
#define cpu_feature_sse4_1 (UINT64_C(1) << 4)
|
||||
#define QT_FUNCTION_TARGET_STRING_SSE4_1 "sse4.1"
|
||||
#define CpuFeatureSSE4_2 (Q_UINT64_C(1) << 6)
|
||||
#define cpu_feature_sse4_2 (UINT64_C(1) << 5)
|
||||
#define QT_FUNCTION_TARGET_STRING_SSE4_2 "sse4.2"
|
||||
#define CpuFeatureMOVBE (Q_UINT64_C(1) << 7)
|
||||
#define cpu_feature_movbe (UINT64_C(1) << 6)
|
||||
#define QT_FUNCTION_TARGET_STRING_MOVBE "movbe"
|
||||
#define CpuFeaturePOPCNT (Q_UINT64_C(1) << 8)
|
||||
#define cpu_feature_popcnt (UINT64_C(1) << 7)
|
||||
#define QT_FUNCTION_TARGET_STRING_POPCNT "popcnt"
|
||||
#define CpuFeatureAES (Q_UINT64_C(1) << 9)
|
||||
#define cpu_feature_aes (UINT64_C(1) << 8)
|
||||
#define QT_FUNCTION_TARGET_STRING_AES "aes,sse4.2"
|
||||
#define CpuFeatureAVX (Q_UINT64_C(1) << 10)
|
||||
#define cpu_feature_avx (UINT64_C(1) << 9)
|
||||
#define QT_FUNCTION_TARGET_STRING_AVX "avx"
|
||||
#define CpuFeatureF16C (Q_UINT64_C(1) << 11)
|
||||
#define QT_FUNCTION_TARGET_STRING_F16C "f16c"
|
||||
#define CpuFeatureRDRND (Q_UINT64_C(1) << 12)
|
||||
#define cpu_feature_f16c (UINT64_C(1) << 10)
|
||||
#define QT_FUNCTION_TARGET_STRING_F16C "f16c,avx"
|
||||
#define cpu_feature_rdrnd (UINT64_C(1) << 11)
|
||||
#define QT_FUNCTION_TARGET_STRING_RDRND "rdrnd"
|
||||
|
||||
// in CPUID Leaf 7, Sub-leaf 0, EBX:
|
||||
#define CpuFeatureBMI (Q_UINT64_C(1) << 13)
|
||||
#define cpu_feature_bmi (UINT64_C(1) << 12)
|
||||
#define QT_FUNCTION_TARGET_STRING_BMI "bmi"
|
||||
#define CpuFeatureHLE (Q_UINT64_C(1) << 14)
|
||||
#define QT_FUNCTION_TARGET_STRING_HLE "hle"
|
||||
#define CpuFeatureAVX2 (Q_UINT64_C(1) << 15)
|
||||
#define QT_FUNCTION_TARGET_STRING_AVX2 "avx2"
|
||||
#define CpuFeatureBMI2 (Q_UINT64_C(1) << 16)
|
||||
#define cpu_feature_avx2 (UINT64_C(1) << 13)
|
||||
#define QT_FUNCTION_TARGET_STRING_AVX2 "avx2,avx"
|
||||
#define cpu_feature_bmi2 (UINT64_C(1) << 14)
|
||||
#define QT_FUNCTION_TARGET_STRING_BMI2 "bmi2"
|
||||
#define CpuFeatureRTM (Q_UINT64_C(1) << 17)
|
||||
#define QT_FUNCTION_TARGET_STRING_RTM "rtm"
|
||||
#define CpuFeatureAVX512F (Q_UINT64_C(1) << 18)
|
||||
#define QT_FUNCTION_TARGET_STRING_AVX512F "avx512f"
|
||||
#define CpuFeatureAVX512DQ (Q_UINT64_C(1) << 19)
|
||||
#define QT_FUNCTION_TARGET_STRING_AVX512DQ "avx512dq"
|
||||
#define CpuFeatureRDSEED (Q_UINT64_C(1) << 20)
|
||||
#define cpu_feature_avx512f (UINT64_C(1) << 15)
|
||||
#define QT_FUNCTION_TARGET_STRING_AVX512F "avx512f,avx"
|
||||
#define cpu_feature_avx512dq (UINT64_C(1) << 16)
|
||||
#define QT_FUNCTION_TARGET_STRING_AVX512DQ "avx512dq,avx512f"
|
||||
#define cpu_feature_rdseed (UINT64_C(1) << 17)
|
||||
#define QT_FUNCTION_TARGET_STRING_RDSEED "rdseed"
|
||||
#define CpuFeatureAVX512IFMA (Q_UINT64_C(1) << 21)
|
||||
#define QT_FUNCTION_TARGET_STRING_AVX512IFMA "avx512ifma"
|
||||
#define CpuFeatureAVX512PF (Q_UINT64_C(1) << 22)
|
||||
#define QT_FUNCTION_TARGET_STRING_AVX512PF "avx512pf"
|
||||
#define CpuFeatureAVX512ER (Q_UINT64_C(1) << 23)
|
||||
#define QT_FUNCTION_TARGET_STRING_AVX512ER "avx512er"
|
||||
#define CpuFeatureAVX512CD (Q_UINT64_C(1) << 24)
|
||||
#define QT_FUNCTION_TARGET_STRING_AVX512CD "avx512cd"
|
||||
#define CpuFeatureSHA (Q_UINT64_C(1) << 25)
|
||||
#define cpu_feature_avx512ifma (UINT64_C(1) << 18)
|
||||
#define QT_FUNCTION_TARGET_STRING_AVX512IFMA "avx512ifma,avx512f"
|
||||
#define cpu_feature_avx512cd (UINT64_C(1) << 19)
|
||||
#define QT_FUNCTION_TARGET_STRING_AVX512CD "avx512cd,avx512f"
|
||||
#define cpu_feature_sha (UINT64_C(1) << 20)
|
||||
#define QT_FUNCTION_TARGET_STRING_SHA "sha"
|
||||
#define CpuFeatureAVX512BW (Q_UINT64_C(1) << 26)
|
||||
#define QT_FUNCTION_TARGET_STRING_AVX512BW "avx512bw"
|
||||
#define CpuFeatureAVX512VL (Q_UINT64_C(1) << 27)
|
||||
#define QT_FUNCTION_TARGET_STRING_AVX512VL "avx512vl"
|
||||
#define cpu_feature_avx512bw (UINT64_C(1) << 21)
|
||||
#define QT_FUNCTION_TARGET_STRING_AVX512BW "avx512bw,avx512f"
|
||||
#define cpu_feature_avx512vl (UINT64_C(1) << 22)
|
||||
#define QT_FUNCTION_TARGET_STRING_AVX512VL "avx512vl,avx512f"
|
||||
|
||||
// in CPUID Leaf 7, Sub-leaf 0, ECX:
|
||||
#define CpuFeatureAVX512VBMI (Q_UINT64_C(1) << 28)
|
||||
#define QT_FUNCTION_TARGET_STRING_AVX512VBMI "avx512vbmi"
|
||||
#define CpuFeatureAVX512VBMI2 (Q_UINT64_C(1) << 29)
|
||||
#define QT_FUNCTION_TARGET_STRING_AVX512VBMI2 "avx512vbmi2"
|
||||
#define CpuFeatureGFNI (Q_UINT64_C(1) << 30)
|
||||
#define cpu_feature_avx512vbmi (UINT64_C(1) << 23)
|
||||
#define QT_FUNCTION_TARGET_STRING_AVX512VBMI "avx512vbmi,avx512f"
|
||||
#define cpu_feature_avx512vbmi2 (UINT64_C(1) << 24)
|
||||
#define QT_FUNCTION_TARGET_STRING_AVX512VBMI2 "avx512vbmi2,avx512f"
|
||||
#define cpu_feature_shstk (UINT64_C(1) << 25)
|
||||
#define QT_FUNCTION_TARGET_STRING_SHSTK "shstk"
|
||||
#define cpu_feature_gfni (UINT64_C(1) << 26)
|
||||
#define QT_FUNCTION_TARGET_STRING_GFNI "gfni"
|
||||
#define CpuFeatureVAES (Q_UINT64_C(1) << 31)
|
||||
#define QT_FUNCTION_TARGET_STRING_VAES "vaes"
|
||||
#define CpuFeatureAVX512VNNI (Q_UINT64_C(1) << 32)
|
||||
#define QT_FUNCTION_TARGET_STRING_AVX512VNNI "avx512vnni"
|
||||
#define CpuFeatureAVX512BITALG (Q_UINT64_C(1) << 33)
|
||||
#define QT_FUNCTION_TARGET_STRING_AVX512BITALG "avx512bitalg"
|
||||
#define CpuFeatureAVX512VPOPCNTDQ (Q_UINT64_C(1) << 34)
|
||||
#define QT_FUNCTION_TARGET_STRING_AVX512VPOPCNTDQ "avx512vpopcntdq"
|
||||
#define cpu_feature_vaes (UINT64_C(1) << 27)
|
||||
#define QT_FUNCTION_TARGET_STRING_VAES "vaes,avx2,avx,aes"
|
||||
#define cpu_feature_avx512vnni (UINT64_C(1) << 28)
|
||||
#define QT_FUNCTION_TARGET_STRING_AVX512VNNI "avx512vnni,avx512f"
|
||||
#define cpu_feature_avx512bitalg (UINT64_C(1) << 29)
|
||||
#define QT_FUNCTION_TARGET_STRING_AVX512BITALG "avx512bitalg,avx512f"
|
||||
#define cpu_feature_avx512vpopcntdq (UINT64_C(1) << 30)
|
||||
#define QT_FUNCTION_TARGET_STRING_AVX512VPOPCNTDQ "avx512vpopcntdq,avx512f"
|
||||
|
||||
// in CPUID Leaf 7, Sub-leaf 0, EDX:
|
||||
#define CpuFeatureAVX5124NNIW (Q_UINT64_C(1) << 35)
|
||||
#define QT_FUNCTION_TARGET_STRING_AVX5124NNIW "avx5124nniw"
|
||||
#define CpuFeatureAVX5124FMAPS (Q_UINT64_C(1) << 36)
|
||||
#define QT_FUNCTION_TARGET_STRING_AVX5124FMAPS "avx5124fmaps"
|
||||
#define cpu_feature_hybrid (UINT64_C(1) << 31)
|
||||
#define QT_FUNCTION_TARGET_STRING_HYBRID "hybrid"
|
||||
#define cpu_feature_ibt (UINT64_C(1) << 32)
|
||||
#define QT_FUNCTION_TARGET_STRING_IBT "ibt"
|
||||
#define cpu_feature_avx512fp16 (UINT64_C(1) << 33)
|
||||
#define QT_FUNCTION_TARGET_STRING_AVX512FP16 "avx512fp16,avx512f,f16c"
|
||||
|
||||
static const quint64 qCompilerCpuFeatures = 0
|
||||
// CPU architectures
|
||||
#define cpu_x86_64 (0 \
|
||||
| cpu_feature_sse2)
|
||||
#define cpu_core2 (cpu_x86_64 \
|
||||
| cpu_feature_sse3 \
|
||||
| cpu_feature_ssse3)
|
||||
#define cpu_nhm (cpu_core2 \
|
||||
| cpu_feature_sse4_1 \
|
||||
| cpu_feature_sse4_2 \
|
||||
| cpu_feature_popcnt)
|
||||
#define cpu_wsm (cpu_nhm)
|
||||
#define cpu_snb (cpu_wsm \
|
||||
| cpu_feature_avx)
|
||||
#define cpu_ivb (cpu_snb \
|
||||
| cpu_feature_f16c \
|
||||
| cpu_feature_rdrnd)
|
||||
#define cpu_hsw (cpu_ivb \
|
||||
| cpu_feature_avx2 \
|
||||
| cpu_feature_fma \
|
||||
| cpu_feature_bmi \
|
||||
| cpu_feature_bmi2 \
|
||||
| cpu_feature_movbe)
|
||||
#define cpu_bdw (cpu_hsw \
|
||||
| cpu_feature_rdseed)
|
||||
#define cpu_bdx (cpu_bdw)
|
||||
#define cpu_skl (cpu_bdw)
|
||||
#define cpu_adl (cpu_skl \
|
||||
| cpu_feature_gfni \
|
||||
| cpu_feature_vaes \
|
||||
| cpu_feature_shstk \
|
||||
| cpu_feature_ibt)
|
||||
#define cpu_skx (cpu_skl \
|
||||
| cpu_feature_avx512f \
|
||||
| cpu_feature_avx512dq \
|
||||
| cpu_feature_avx512cd \
|
||||
| cpu_feature_avx512bw \
|
||||
| cpu_feature_avx512vl)
|
||||
#define cpu_clx (cpu_skx \
|
||||
| cpu_feature_avx512vnni)
|
||||
#define cpu_cpx (cpu_clx)
|
||||
#define cpu_cnl (cpu_skx \
|
||||
| cpu_feature_avx512ifma \
|
||||
| cpu_feature_avx512vbmi)
|
||||
#define cpu_icl (cpu_cnl \
|
||||
| cpu_feature_avx512vbmi2 \
|
||||
| cpu_feature_gfni \
|
||||
| cpu_feature_vaes \
|
||||
| cpu_feature_avx512vnni \
|
||||
| cpu_feature_avx512bitalg \
|
||||
| cpu_feature_avx512vpopcntdq)
|
||||
#define cpu_icx (cpu_icl)
|
||||
#define cpu_tgl (cpu_icl \
|
||||
| cpu_feature_shstk \
|
||||
| cpu_feature_ibt)
|
||||
#define cpu_spr (cpu_tgl)
|
||||
#define cpu_slm (cpu_wsm \
|
||||
| cpu_feature_rdrnd \
|
||||
| cpu_feature_movbe)
|
||||
#define cpu_glm (cpu_slm \
|
||||
| cpu_feature_rdseed)
|
||||
#define cpu_tnt (cpu_glm \
|
||||
| cpu_feature_gfni)
|
||||
#define cpu_nehalem (cpu_nhm)
|
||||
#define cpu_westmere (cpu_wsm)
|
||||
#define cpu_sandybridge (cpu_snb)
|
||||
#define cpu_ivybridge (cpu_ivb)
|
||||
#define cpu_haswell (cpu_hsw)
|
||||
#define cpu_broadwell (cpu_bdw)
|
||||
#define cpu_skylake (cpu_skl)
|
||||
#define cpu_skylake_avx512 (cpu_skx)
|
||||
#define cpu_cascadelake (cpu_clx)
|
||||
#define cpu_cooperlake (cpu_cpx)
|
||||
#define cpu_cannonlake (cpu_cnl)
|
||||
#define cpu_icelake_client (cpu_icl)
|
||||
#define cpu_icelake_server (cpu_icx)
|
||||
#define cpu_alderlake (cpu_adl)
|
||||
#define cpu_sapphirerapids (cpu_spr)
|
||||
#define cpu_tigerlake (cpu_tgl)
|
||||
#define cpu_silvermont (cpu_slm)
|
||||
#define cpu_goldmont (cpu_glm)
|
||||
#define cpu_tremont (cpu_tnt)
|
||||
|
||||
static const uint64_t _compilerCpuFeatures = 0
|
||||
#ifdef __SSE2__
|
||||
| CpuFeatureSSE2
|
||||
| cpu_feature_sse2
|
||||
#endif
|
||||
#ifdef __SSE3__
|
||||
| CpuFeatureSSE3
|
||||
| cpu_feature_sse3
|
||||
#endif
|
||||
#ifdef __SSSE3__
|
||||
| CpuFeatureSSSE3
|
||||
| cpu_feature_ssse3
|
||||
#endif
|
||||
#ifdef __FMA__
|
||||
| CpuFeatureFMA
|
||||
| cpu_feature_fma
|
||||
#endif
|
||||
#ifdef __SSE4_1__
|
||||
| CpuFeatureSSE4_1
|
||||
| cpu_feature_sse4_1
|
||||
#endif
|
||||
#ifdef __SSE4_2__
|
||||
| CpuFeatureSSE4_2
|
||||
| cpu_feature_sse4_2
|
||||
#endif
|
||||
#ifdef __MOVBE__
|
||||
| CpuFeatureMOVBE
|
||||
| cpu_feature_movbe
|
||||
#endif
|
||||
#ifdef __POPCNT__
|
||||
| CpuFeaturePOPCNT
|
||||
| cpu_feature_popcnt
|
||||
#endif
|
||||
#ifdef __AES__
|
||||
| CpuFeatureAES
|
||||
| cpu_feature_aes
|
||||
#endif
|
||||
#ifdef __AVX__
|
||||
| CpuFeatureAVX
|
||||
| cpu_feature_avx
|
||||
#endif
|
||||
#ifdef __F16C__
|
||||
| CpuFeatureF16C
|
||||
| cpu_feature_f16c
|
||||
#endif
|
||||
#ifdef __RDRND__
|
||||
| CpuFeatureRDRND
|
||||
| cpu_feature_rdrnd
|
||||
#endif
|
||||
#ifdef __BMI__
|
||||
| CpuFeatureBMI
|
||||
#endif
|
||||
#ifdef __HLE__
|
||||
| CpuFeatureHLE
|
||||
| cpu_feature_bmi
|
||||
#endif
|
||||
#ifdef __AVX2__
|
||||
| CpuFeatureAVX2
|
||||
| cpu_feature_avx2
|
||||
#endif
|
||||
#ifdef __BMI2__
|
||||
| CpuFeatureBMI2
|
||||
#endif
|
||||
#ifdef __RTM__
|
||||
| CpuFeatureRTM
|
||||
| cpu_feature_bmi2
|
||||
#endif
|
||||
#ifdef __AVX512F__
|
||||
| CpuFeatureAVX512F
|
||||
| cpu_feature_avx512f
|
||||
#endif
|
||||
#ifdef __AVX512DQ__
|
||||
| CpuFeatureAVX512DQ
|
||||
| cpu_feature_avx512dq
|
||||
#endif
|
||||
#ifdef __RDSEED__
|
||||
| CpuFeatureRDSEED
|
||||
| cpu_feature_rdseed
|
||||
#endif
|
||||
#ifdef __AVX512IFMA__
|
||||
| CpuFeatureAVX512IFMA
|
||||
#endif
|
||||
#ifdef __AVX512PF__
|
||||
| CpuFeatureAVX512PF
|
||||
#endif
|
||||
#ifdef __AVX512ER__
|
||||
| CpuFeatureAVX512ER
|
||||
| cpu_feature_avx512ifma
|
||||
#endif
|
||||
#ifdef __AVX512CD__
|
||||
| CpuFeatureAVX512CD
|
||||
| cpu_feature_avx512cd
|
||||
#endif
|
||||
#ifdef __SHA__
|
||||
| CpuFeatureSHA
|
||||
| cpu_feature_sha
|
||||
#endif
|
||||
#ifdef __AVX512BW__
|
||||
| CpuFeatureAVX512BW
|
||||
| cpu_feature_avx512bw
|
||||
#endif
|
||||
#ifdef __AVX512VL__
|
||||
| CpuFeatureAVX512VL
|
||||
| cpu_feature_avx512vl
|
||||
#endif
|
||||
#ifdef __AVX512VBMI__
|
||||
| CpuFeatureAVX512VBMI
|
||||
| cpu_feature_avx512vbmi
|
||||
#endif
|
||||
#ifdef __AVX512VBMI2__
|
||||
| CpuFeatureAVX512VBMI2
|
||||
| cpu_feature_avx512vbmi2
|
||||
#endif
|
||||
#ifdef __SHSTK__
|
||||
| cpu_feature_shstk
|
||||
#endif
|
||||
#ifdef __GFNI__
|
||||
| CpuFeatureGFNI
|
||||
| cpu_feature_gfni
|
||||
#endif
|
||||
#ifdef __VAES__
|
||||
| CpuFeatureVAES
|
||||
| cpu_feature_vaes
|
||||
#endif
|
||||
#ifdef __AVX512VNNI__
|
||||
| CpuFeatureAVX512VNNI
|
||||
| cpu_feature_avx512vnni
|
||||
#endif
|
||||
#ifdef __AVX512BITALG__
|
||||
| CpuFeatureAVX512BITALG
|
||||
| cpu_feature_avx512bitalg
|
||||
#endif
|
||||
#ifdef __AVX512VPOPCNTDQ__
|
||||
| CpuFeatureAVX512VPOPCNTDQ
|
||||
| cpu_feature_avx512vpopcntdq
|
||||
#endif
|
||||
#ifdef __AVX5124NNIW__
|
||||
| CpuFeatureAVX5124NNIW
|
||||
#ifdef __HYBRID__
|
||||
| cpu_feature_hybrid
|
||||
#endif
|
||||
#ifdef __AVX5124FMAPS__
|
||||
| CpuFeatureAVX5124FMAPS
|
||||
#ifdef __IBT__
|
||||
| cpu_feature_ibt
|
||||
#endif
|
||||
#ifdef __AVX512FP16__
|
||||
| cpu_feature_avx512fp16
|
||||
#endif
|
||||
;
|
||||
|
||||
QT_END_NAMESPACE
|
||||
#if (defined __cplusplus) && __cplusplus >= 201103L
|
||||
enum X86CpuFeatures : uint64_t {
|
||||
CpuFeatureSSE2 = cpu_feature_sse2, ///< Streaming SIMD Extensions 2
|
||||
CpuFeatureSSE3 = cpu_feature_sse3, ///< Streaming SIMD Extensions 3
|
||||
CpuFeatureSSSE3 = cpu_feature_ssse3, ///< Supplemental Streaming SIMD Extensions 3
|
||||
CpuFeatureFMA = cpu_feature_fma, ///< Fused Multiply-Add
|
||||
CpuFeatureSSE4_1 = cpu_feature_sse4_1, ///< Streaming SIMD Extensions 4.1
|
||||
CpuFeatureSSE4_2 = cpu_feature_sse4_2, ///< Streaming SIMD Extensions 4.2
|
||||
CpuFeatureMOVBE = cpu_feature_movbe, ///< MOV Big Endian
|
||||
CpuFeaturePOPCNT = cpu_feature_popcnt, ///< Population count
|
||||
CpuFeatureAES = cpu_feature_aes, ///< Advenced Encryption Standard
|
||||
CpuFeatureAVX = cpu_feature_avx, ///< Advanced Vector Extensions
|
||||
CpuFeatureF16C = cpu_feature_f16c, ///< 16-bit Float Conversion
|
||||
CpuFeatureRDRND = cpu_feature_rdrnd, ///< Random number generator
|
||||
CpuFeatureBMI = cpu_feature_bmi, ///< Bit Manipulation Instructions
|
||||
CpuFeatureAVX2 = cpu_feature_avx2, ///< Advanced Vector Extensions 2
|
||||
CpuFeatureBMI2 = cpu_feature_bmi2, ///< Bit Manipulation Instructions 2
|
||||
CpuFeatureAVX512F = cpu_feature_avx512f, ///< AVX512 Foundation
|
||||
CpuFeatureAVX512DQ = cpu_feature_avx512dq, ///< AVX512 Double & Quadword
|
||||
CpuFeatureRDSEED = cpu_feature_rdseed, ///< Random number generator for seeding
|
||||
CpuFeatureAVX512IFMA = cpu_feature_avx512ifma, ///< AVX512 Integer Fused Multiply-Add
|
||||
CpuFeatureAVX512CD = cpu_feature_avx512cd, ///< AVX512 Conflict Detection
|
||||
CpuFeatureSHA = cpu_feature_sha, ///< SHA-1 and SHA-256 instructions
|
||||
CpuFeatureAVX512BW = cpu_feature_avx512bw, ///< AVX512 Byte & Word
|
||||
CpuFeatureAVX512VL = cpu_feature_avx512vl, ///< AVX512 Vector Length
|
||||
CpuFeatureAVX512VBMI = cpu_feature_avx512vbmi, ///< AVX512 Vector Byte Manipulation Instructions
|
||||
CpuFeatureAVX512VBMI2 = cpu_feature_avx512vbmi2, ///< AVX512 Vector Byte Manipulation Instructions 2
|
||||
CpuFeatureSHSTK = cpu_feature_shstk, ///< Control Flow Enforcement Technology Shadow Stack
|
||||
CpuFeatureGFNI = cpu_feature_gfni, ///< Galois Field new instructions
|
||||
CpuFeatureVAES = cpu_feature_vaes, ///< 256- and 512-bit AES
|
||||
CpuFeatureAVX512VNNI = cpu_feature_avx512vnni, ///< AVX512 Vector Neural Network Instructions
|
||||
CpuFeatureAVX512BITALG = cpu_feature_avx512bitalg, ///< AVX512 Bit Algorithms
|
||||
CpuFeatureAVX512VPOPCNTDQ = cpu_feature_avx512vpopcntdq, ///< AVX512 Population Count
|
||||
CpuFeatureHYBRID = cpu_feature_hybrid, ///< Hybrid processor
|
||||
CpuFeatureIBT = cpu_feature_ibt, ///< Control Flow Enforcement Technology Indirect Branch Tracking
|
||||
CpuFeatureAVX512FP16 = cpu_feature_avx512fp16, ///< AVX512 16-bit Floating Point
|
||||
}; // enum X86CpuFeatures
|
||||
|
||||
#endif // QSIMD_X86_P_H
|
||||
enum X86CpuArchitectures : uint64_t {
|
||||
CpuArchx8664 = cpu_x86_64,
|
||||
CpuArchCore2 = cpu_core2,
|
||||
CpuArchNHM = cpu_nhm,
|
||||
CpuArchWSM = cpu_wsm,
|
||||
CpuArchSNB = cpu_snb,
|
||||
CpuArchIVB = cpu_ivb,
|
||||
CpuArchHSW = cpu_hsw,
|
||||
CpuArchBDW = cpu_bdw,
|
||||
CpuArchBDX = cpu_bdx,
|
||||
CpuArchSKL = cpu_skl,
|
||||
CpuArchADL = cpu_adl,
|
||||
CpuArchSKX = cpu_skx,
|
||||
CpuArchCLX = cpu_clx,
|
||||
CpuArchCPX = cpu_cpx,
|
||||
CpuArchCNL = cpu_cnl,
|
||||
CpuArchICL = cpu_icl,
|
||||
CpuArchICX = cpu_icx,
|
||||
CpuArchTGL = cpu_tgl,
|
||||
CpuArchSPR = cpu_spr,
|
||||
CpuArchSLM = cpu_slm,
|
||||
CpuArchGLM = cpu_glm,
|
||||
CpuArchTNT = cpu_tnt,
|
||||
CpuArchNehalem = cpu_nehalem, ///< Intel Core i3/i5/i7
|
||||
CpuArchWestmere = cpu_westmere, ///< Intel Core i3/i5/i7
|
||||
CpuArchSandyBridge = cpu_sandybridge, ///< Second Generation Intel Core i3/i5/i7
|
||||
CpuArchIvyBridge = cpu_ivybridge, ///< Third Generation Intel Core i3/i5/i7
|
||||
CpuArchHaswell = cpu_haswell, ///< Fourth Generation Intel Core i3/i5/i7
|
||||
CpuArchBroadwell = cpu_broadwell, ///< Fifth Generation Intel Core i3/i5/i7
|
||||
CpuArchSkylake = cpu_skylake, ///< Sixth Generation Intel Core i3/i5/i7
|
||||
CpuArchSkylakeAvx512 = cpu_skylake_avx512, ///< Intel Xeon Scalable
|
||||
CpuArchCascadeLake = cpu_cascadelake, ///< Second Generation Intel Xeon Scalable
|
||||
CpuArchCooperLake = cpu_cooperlake, ///< Third Generation Intel Xeon Scalable
|
||||
CpuArchCannonLake = cpu_cannonlake, ///< Intel Core i3-8121U
|
||||
CpuArchIceLakeClient = cpu_icelake_client, ///< Tenth Generation Intel Core i3/i5/i7
|
||||
CpuArchIceLakeServer = cpu_icelake_server, ///< Third Generation Intel Xeon Scalable
|
||||
CpuArchAlderLake = cpu_alderlake,
|
||||
CpuArchSapphireRapids = cpu_sapphirerapids,
|
||||
CpuArchTigerLake = cpu_tigerlake, ///< Eleventh Generation Intel Core i3/i5/i7
|
||||
CpuArchSilvermont = cpu_silvermont,
|
||||
CpuArchGoldmont = cpu_goldmont,
|
||||
CpuArchTremont = cpu_tremont,
|
||||
}; // enum X86cpuArchitectures
|
||||
#endif /* C++11 */
|
||||
|
||||
#endif /* QSIMD_X86_P_H */
|
||||
|
1
util/x86simdgen/.gitignore
vendored
Normal file
1
util/x86simdgen/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
qsimd_x86_p.h
|
35
util/x86simdgen/3rdparty/simd-amd.conf
vendored
Normal file
35
util/x86simdgen/3rdparty/simd-amd.conf
vendored
Normal file
@ -0,0 +1,35 @@
|
||||
# -*- mode: conf; indent-tabs-mode: t -*-
|
||||
# Feature CPUID function Bit Required feature
|
||||
#mmxext Leaf80000001hEDX 22 # AMD extensions to MMX
|
||||
#rdtscp Leaf80000001hEDX 27 # RDTSCP instruction
|
||||
#3dnow Leaf80000001hEDX 31 # 3DNow! instructions
|
||||
#3dnowext Leaf80000001hEDX 30 # AMD extensions to 3DNow!
|
||||
lzcnt Leaf80000001hECX 5 # Leading Zero Count
|
||||
sse4a Leaf80000001hECX 6 # SSE4a
|
||||
xop Leaf80000001hECX 11 # eXtended Operations
|
||||
fma4 Leaf80000001hECX 16 # 4-operand Fused Multiply-Add
|
||||
tbm Leaf80000001hECX 21 # Trailing Bit Manipulation
|
||||
clzero Leaf80000008hEBX 0 # Cacheline clear and write zero
|
||||
wbnoinvd Leaf80000008hEBX 9 # Write Back with No Invalidate
|
||||
|
||||
# Processor/arch listing below this line
|
||||
# Source: GCC gcc/common/config/i386/i386-common.c
|
||||
# Source: Wikipedia
|
||||
# Architecture Based on New features
|
||||
arch=AmdFam10h x86_64 sse3,sse4a,cx16,popcnt,lzcnt # AMD K10
|
||||
arch=BtVer1 AmdFam10h xsave # AMD Bobcat v1
|
||||
arch=BtVer2 BtVer1 ssse3,sse4.1,sse4.2,avx,bmi,f16c,movbe,xsaveopt # AMD Bobcat v2
|
||||
arch=BdVer1 BtVer1 ssse3,sse4.1,sse4.2,avx,xop,fma4
|
||||
arch=BdVer2 BdVer1 bmi,f16c,fma,tbm
|
||||
arch=BdVer3 BdVer2 fsgsbase,xsaveopt
|
||||
arch=BdVer4 BdVer3 avx2,bmi2,rdrnd,movbe
|
||||
arch=ZnVer1 BdVer4 adx,rdseed,clzero,clfushopt,xsavec,xsaves
|
||||
arch=ZnVer2 ZnVer1 clwb,wbnoinvd
|
||||
|
||||
arch=Barcelona AmdFam10h
|
||||
arch=Bulldozer BdVer1 # AMD Bulldozer
|
||||
arch=Piledriver BdVer2 # AMD Bulldozer v2 (Piledriver)
|
||||
arch=Steamroller BdVer3 # AMD Bulldozer v3 (Steamroller)
|
||||
arch=Excavator BdVer4 # AMD Bulldozer v4 (Excavator)
|
||||
arch=Zen ZnVer1 # AMD Zen
|
||||
arch=Zen2 ZnVer2 # AMD Zen2
|
179
util/x86simdgen/3rdparty/simd-intel.conf
vendored
Normal file
179
util/x86simdgen/3rdparty/simd-intel.conf
vendored
Normal file
@ -0,0 +1,179 @@
|
||||
# -*- mode: conf; indent-tabs-mode: t -*-
|
||||
# Feature CPUID function Bit Required feature
|
||||
#sep Leaf01EDX 11 # Sysenter/sysexit
|
||||
#cmov Leaf01EDX 15 # Conditional Move
|
||||
#clflush Leaf01EDX 19 # Cache-Line Flush
|
||||
#mmx Leaf01EDX 23 # Multi Media Extensions
|
||||
#fxsr Leaf01EDX 24 # FXSAVE instruction
|
||||
#sse Leaf01EDX 25 # Streaming SIMD Extensions
|
||||
sse2 Leaf01EDX 26 # Streaming SIMD Extensions 2
|
||||
# -- everything above this line is mandatory on x86-64 --
|
||||
sse3 Leaf01ECX 0 # Streaming SIMD Extensions 3
|
||||
#pclmul Leaf01ECX 1 # Carryless Multiply
|
||||
ssse3 Leaf01ECX 9 # Supplemental Streaming SIMD Extensions 3
|
||||
fma Leaf01ECX 12 # Fused Multiply-Add
|
||||
#cx16 Leaf01ECX 13 # Compare-Exchange 16 bytes
|
||||
sse4.1 Leaf01ECX 19 # Streaming SIMD Extensions 4.1
|
||||
sse4.2 Leaf01ECX 20 # Streaming SIMD Extensions 4.2
|
||||
movbe Leaf01ECX 22 # MOV Big Endian
|
||||
popcnt Leaf01ECX 23 # Population count
|
||||
aes Leaf01ECX 25 sse4.2 # Advenced Encryption Standard
|
||||
#xsave Leaf01ECX 26 # XSAVE, XGETBV instructions
|
||||
#osxsave Leaf01ECX 27 # XSAVE enabled by OS
|
||||
avx Leaf01ECX 28 # Advanced Vector Extensions
|
||||
f16c Leaf01ECX 29 avx # 16-bit Float Conversion
|
||||
rdrnd Leaf01ECX 30 # Random number generator
|
||||
#hypervisor Leaf01ECX 31 # Running on a hypervisor
|
||||
#fsgsbase Leaf07_00EBX 0 # FS/GS base access
|
||||
bmi Leaf07_00EBX 3 # Bit Manipulation Instructions
|
||||
#hle Leaf07_00EBX 4 # Hardware Lock Ellision
|
||||
avx2 Leaf07_00EBX 5 avx # Advanced Vector Extensions 2
|
||||
bmi2 Leaf07_00EBX 8 # Bit Manipulation Instructions 2
|
||||
#erms Leaf07_00EBX 9 # Enhanced REP MOVSB/STOSB
|
||||
#rtm Leaf07_00EBX 11 # Restricted Transactional Memory
|
||||
#rdt_m Leaf07_00EBX 12 # Resource Director Technology (RDT) Monitoring
|
||||
#mpx Leaf07_00EBX 14 # Memory Protection Extensions
|
||||
#rdt_a Leaf07_00EBX 12 # Resource Director Technology (RDT) Allocation
|
||||
avx512f Leaf07_00EBX 16 avx # AVX512 Foundation
|
||||
avx512dq Leaf07_00EBX 17 avx512f # AVX512 Double & Quadword
|
||||
rdseed Leaf07_00EBX 18 # Random number generator for seeding
|
||||
#adx Leaf07_00EBX 19 # Multi-Precision Add-Carry
|
||||
avx512ifma Leaf07_00EBX 21 avx512f # AVX512 Integer Fused Multiply-Add
|
||||
#clflushopt Leaf07_00EBX 23 # Cache-Fline Flush Optimized
|
||||
#clwb Leaf07_00EBX 24 # Cache-Line Write Back
|
||||
#avx512pf Leaf07_00EBX 26 avx512f # AVX512 Prefetch
|
||||
#avx512er Leaf07_00EBX 27 avx512f # AVX512 Exponential & Reciprocal
|
||||
avx512cd Leaf07_00EBX 28 avx512f # AVX512 Conflict Detection
|
||||
sha Leaf07_00EBX 29 # SHA-1 and SHA-256 instructions
|
||||
avx512bw Leaf07_00EBX 30 avx512f # AVX512 Byte & Word
|
||||
avx512vl Leaf07_00EBX 31 avx512f # AVX512 Vector Length
|
||||
avx512vbmi Leaf07_00ECX 1 avx512f # AVX512 Vector Byte Manipulation Instructions
|
||||
#pku Leaf07_00ECX 3 # Protection Keys for User mode
|
||||
#ospke Leaf07_00ECX 4 # Protection Keys Enabled by OS
|
||||
#waitpkg Leaf07_00ECX 5 # User-Level Monitor / Wait
|
||||
avx512vbmi2 Leaf07_00ECX 6 avx512f # AVX512 Vector Byte Manipulation Instructions 2
|
||||
shstk Leaf07_00ECX 7 # Control Flow Enforcement Technology Shadow Stack
|
||||
gfni Leaf07_00ECX 8 # Galois Field new instructions
|
||||
vaes Leaf07_00ECX 9 avx2,avx,aes # 256- and 512-bit AES
|
||||
#vpclmulqdq Leaf07_00ECX 10 avx # 256- and 512-bit Carryless Multiply
|
||||
avx512vnni Leaf07_00ECX 11 avx512f # AVX512 Vector Neural Network Instructions
|
||||
avx512bitalg Leaf07_00ECX 12 avx512f # AVX512 Bit Algorithms
|
||||
avx512vpopcntdq Leaf07_00ECX 14 avx512f # AVX512 Population Count
|
||||
#la57 Leaf07_00ECX 16 # 5-level page tables
|
||||
#rdpid Leaf07_00ECX 22 # RDPID instruction
|
||||
#cldemote Leaf07_00ECX 25 # Cache Line Demotion
|
||||
#movdiri Leaf07_00ECX 27 # Move Direct-store Integer
|
||||
#movdir64b Leaf07_00ECX 28 # Move Direct-store 64 bytes
|
||||
#enqcmd Leaf07_00ECX 29 # Enqueue Command
|
||||
#pks Leaf07_00ECX 31 # Protection Keys for Supervisor mode
|
||||
#avx5124nniw Leaf07_00EDX 2 avx512f # AVX512 4-iteration Vector Neural Network Instructions
|
||||
#avx5124fmaps Leaf07_00EDX 3 avx512f # AVX512 4-iteration Fused Multiply Accumulation
|
||||
#fsrm Leaf07_00EDX 4 # Fast Short REP MOV
|
||||
#uintr Leaf07_00EDX 5 # User interrupts
|
||||
#avx512vp2intersect Leaf07_00EDX 8 avx512f # AVX512 Intersection computation
|
||||
#serialize Leaf07_00EDX 14 # SERIALIZE instruction
|
||||
hybrid Leaf07_00EDX 15 # Hybrid processor
|
||||
#tsxldtrk Leaf07_00EDX 16 # TDX (RTM) Suspend Load Address Tracking
|
||||
#pconfig Leaf07_00EDX 18 # Platform configuration
|
||||
ibt Leaf07_00EDX 20 # Control Flow Enforcement Technology Indirect Branch Tracking
|
||||
#amxbf16 Leaf07_00EDX 22 amxtile # AMX Tile multiplication in BFloat16
|
||||
avx512fp16 Leaf07_00EDX 23 avx512f,f16c # AVX512 16-bit Floating Point
|
||||
#amxtile Leaf07_00EDX 24 # Advanced Matrix Extensions Tile support
|
||||
#amxint8 Leaf07_00EDX 25 amxtile # AMX Tile multiplication for Int8
|
||||
#avxvnni Leaf07_01EAX 4 avx # AVX (VEX-encoded) versions of the Vector Neural Network Instructions
|
||||
#avx512bf16 Leaf07_01EAX 5 avx512f # AVX512 Brain Float16
|
||||
#zlmovsb Leaf07_01EAX 10 # Zero-length MOVSB
|
||||
#fsrs Leaf07_01EAX 11 # Fast Short (REP?) STOSB
|
||||
#fsrc Leaf07_01EAX 12 # Fast Short (REP?) CMPSB, SCASB
|
||||
#fred Leaf07_01EAX 17 # Flexible Return and Event Delivery
|
||||
#lkgs Leaf07_01EAX 18 # Load into Kernel GS
|
||||
#lam Leaf07_01EAX 26 # Linear Address Masking
|
||||
#xsaveopt Leaf13_01EAX 0 # Optimized XSAVE
|
||||
#xsavec Leaf13_01EAX 1 # XSAVE with Compaction
|
||||
#xgetbv1 Leaf13_01EAX 2 # XGETBV with ECX=1
|
||||
#xsaves Leaf13_01EAX 3 # XSAVE Supervisor mode
|
||||
#xfd Leaf13_01EAX 4 # eXtended Feature Disable MSR
|
||||
#lzcnt Leaf80000001hECX 5 # Leading Zero Count
|
||||
|
||||
# XSAVE states
|
||||
# Source: Intel Software Development Manual, Volume 1, Chapter 13
|
||||
# Source: Intel Instruction Set Extensions Manual (ed. 041), Chapter 3, "Intel AMX Instruction Set"
|
||||
# Grouping Value Required for
|
||||
xsave=X87 0x0001 # X87 and MMX state
|
||||
xsave=SseState 0x0002 sse # SSE: 128 bits of XMM registers
|
||||
xsave=Ymm_Hi128 0x0004 # AVX: high 128 bits in YMM registers
|
||||
xsave=Bndregs 0x0008 # Memory Protection Extensions
|
||||
xsave=Bndcsr 0x0010 # Memory Protection Extensions
|
||||
xsave=OpMask 0x0020 # AVX512: k0 through k7
|
||||
xsave=Zmm_Hi256 0x0040 # AVX512: high 256 bits of ZMM0-15
|
||||
xsave=Hi16_Zmm 0x0080 # AVX512: all 512 bits of ZMM16-31
|
||||
xsave=PTState 0x0100 # Processor Trace
|
||||
xsave=PKRUState 0x0200 pku # Protection Key
|
||||
# ??? 0x0400
|
||||
xsave=CetUState 0x0800 # CET: user mode
|
||||
xsave=CetSState 0x1000 # CET: supervisor mode
|
||||
xsave=HdcState 0x2000 # Hardware Duty Cycle
|
||||
xsave=UintrState 0x4000 uintr # User Interrupts
|
||||
# ??? 0x8000
|
||||
xsave=HwpState 0x10000 # Hardware P-State
|
||||
xsave=Xtilecfg 0x20000 # AMX: XTILECFG register
|
||||
xsave=Xtiledata 0x40000 # AMX: data in the tiles
|
||||
xsave=AvxState SseState|Ymm_Hi128 avx,fma,avx512f
|
||||
xsave=MPXState Bndregs|Bndcsr mpx
|
||||
xsave=Avx512State AvxState|OpMask|Zmm_Hi256|Hi16_Zmm avx512f
|
||||
xsave=CetState CetUState|CetSState shstk
|
||||
xsave=AmxState Xtilecfg|Xtiledata amxtile
|
||||
|
||||
# Processor/arch listing below this line
|
||||
# Source: Intel Instruction Set Extension manual, section 1.2
|
||||
# Source: GCC gcc/config/i386/i386.h, i386-c.c, i386-builtins.c
|
||||
# Architecture Based on New features Optional features
|
||||
arch=x86_64 <> sse2
|
||||
# Core line
|
||||
arch=Core2 x86_64 sse3,ssse3,cx16
|
||||
arch=NHM Core2 sse4.1,sse4.2,popcnt
|
||||
arch=WSM NHM
|
||||
arch=SNB WSM avx
|
||||
arch=IVB SNB f16c,rdrnd,fsgsbase
|
||||
arch=HSW IVB avx2,fma,bmi,bmi2,lzcnt,movbe
|
||||
arch=BDW HSW adx,rdseed
|
||||
arch=BDX BDW
|
||||
arch=SKL BDW xsavec,xsaves
|
||||
arch=ADL SKL avxvnni,gfni,vaes,vpclmulqdq,serialize,shstk,cldemote,movdiri,movdir64b,ibt,waitpkg,keylocker rdpid
|
||||
arch=SKX SKL avx512f,avx512dq,avx512cd,avx512bw,avx512vl clwb
|
||||
arch=CLX SKX avx512vnni
|
||||
arch=CPX CLX avx512bf16
|
||||
arch=CNL SKX avx512ifma,avx512vbmi sha
|
||||
arch=ICL CNL avx512vbmi2,gfni,vaes,vpclmulqdq,avx512vnni,avx512bitalg,avx512vpopcntdq fsrm,rdpid
|
||||
arch=ICX ICL pconfig
|
||||
arch=TGL ICL avx512vp2intersect,shstk,,movdiri,movdir64b,ibt,keylocker
|
||||
arch=SPR TGL avx512bf16,amxtile,amxbf16,amxint8,avxvnni,cldemote,pconfig,waitpkg,serialize,tsxldtrk,uintr
|
||||
# Atom line
|
||||
arch=SLM WSM rdrnd,movbe
|
||||
arch=GLM SLM fsgsbase,rdseed,lzcnt,xsavec,xsaves
|
||||
arch=TNT GLM clwb,gfni,cldemote,waitpkg,movdiri,movdir64b
|
||||
# Xeon Phi line
|
||||
#arch=KNL SKL avx512f,avx512er,avx512pf,avx512cd
|
||||
#arch=KNM KNL avx5124fmaps,avx5124vnniw,avx512vpopcntdq
|
||||
# Longer names
|
||||
arch=Nehalem NHM # Intel Core i3/i5/i7
|
||||
arch=Westmere WSM # Intel Core i3/i5/i7
|
||||
arch=SandyBridge SNB # Second Generation Intel Core i3/i5/i7
|
||||
arch=IvyBridge IVB # Third Generation Intel Core i3/i5/i7
|
||||
arch=Haswell HSW # Fourth Generation Intel Core i3/i5/i7
|
||||
arch=Broadwell BDW # Fifth Generation Intel Core i3/i5/i7
|
||||
arch=Skylake SKL # Sixth Generation Intel Core i3/i5/i7
|
||||
arch=Skylake-Avx512 SKX # Intel Xeon Scalable
|
||||
arch=CascadeLake CLX # Second Generation Intel Xeon Scalable
|
||||
arch=CooperLake CPX # Third Generation Intel Xeon Scalable
|
||||
arch=CannonLake CNL # Intel Core i3-8121U
|
||||
arch=IceLake-Client ICL # Tenth Generation Intel Core i3/i5/i7
|
||||
arch=IceLake-Server ICX # Third Generation Intel Xeon Scalable
|
||||
arch=AlderLake ADL
|
||||
arch=SapphireRapids SPR
|
||||
arch=TigerLake TGL # Eleventh Generation Intel Core i3/i5/i7
|
||||
arch=Silvermont SLM
|
||||
arch=Goldmont GLM
|
||||
arch=Tremont TNT
|
||||
#arch=KnightsLanding KNL
|
||||
#arch=KnightsMill KNM
|
329
util/x86simdgen/3rdparty/x86simd_generate.pl
vendored
Executable file
329
util/x86simdgen/3rdparty/x86simd_generate.pl
vendored
Executable file
@ -0,0 +1,329 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
use strict;
|
||||
$\ = "\n";
|
||||
$/ = "\n";
|
||||
my $debug = 0;
|
||||
my %leaves = (
|
||||
Leaf01ECX => "CPUID Leaf 1, ECX",
|
||||
Leaf07_00EBX => "CPUID Leaf 7, Sub-leaf 0, EBX",
|
||||
Leaf07_00ECX => "CPUID Leaf 7, Sub-leaf 0, ECX",
|
||||
Leaf07_00EDX => "CPUID Leaf 7, Sub-leaf 0, EDX",
|
||||
Leaf07_01EAX => "CPUID Leaf 7, Sub-leaf 1, EAX",
|
||||
Leaf13_01EAX => "CPUID Leaf 13, Sub-leaf 1, EAX",
|
||||
Leaf80000001hECX => "CPUID Leaf 80000001h, ECX",
|
||||
Leaf80000008hEBX => "CPUID Leaf 80000008h, EBX",
|
||||
);
|
||||
my @leafNames = sort keys %leaves;
|
||||
|
||||
# out of order (we want it first)
|
||||
unshift @leafNames, "Leaf01EDX";
|
||||
$leaves{Leaf01EDX} = "CPUID Leaf 1, EDX";
|
||||
|
||||
# Read input from file specified by first argument
|
||||
my $input_conf_file = shift @ARGV;
|
||||
open(FH, '<', $input_conf_file) or die $!;
|
||||
|
||||
my $i = 0;
|
||||
my @features;
|
||||
my @architecture_names;
|
||||
my %architectures;
|
||||
my @xsaveStates;
|
||||
my $maxarchnamelen = 0;
|
||||
while (<FH>) {
|
||||
chomp $_;
|
||||
m/#\s*(.*)\s*/;
|
||||
my $comment = $1;
|
||||
|
||||
s/#.*$//;
|
||||
s/^\s+//;
|
||||
next if $_ eq "";
|
||||
|
||||
if (s/^arch=//) {
|
||||
my ($arch, $based, $f) = split /\s+/;
|
||||
die("Unknown base architecture \"$based\"")
|
||||
unless $based eq "<>" or grep {$_ eq $based} @architecture_names;
|
||||
my $id = lc($arch);
|
||||
$id =~ s/[^A-Za-z0-9_]/_/g;
|
||||
|
||||
my $prettyname = $arch;
|
||||
$prettyname =~ s/\B([A-Z])/ $1/g;
|
||||
$prettyname =~ s/-(\w+)/ ($1)/g;
|
||||
$maxarchnamelen = length($prettyname) if length($prettyname) > $maxarchnamelen;
|
||||
|
||||
my @basefeatures;
|
||||
my @extrafeatures;
|
||||
@basefeatures = @{$architectures{$based}->{allfeatures}} if $based ne "<>";
|
||||
@extrafeatures = @{$architectures{$arch}{features}} if defined($architectures{$arch});
|
||||
@extrafeatures = (@extrafeatures, split(',', $f));
|
||||
my @allfeatures = sort (@basefeatures, @extrafeatures);
|
||||
|
||||
$architectures{$arch} = {
|
||||
name => $arch,
|
||||
prettyname => $prettyname,
|
||||
id => $id,
|
||||
base => $based,
|
||||
features => \@extrafeatures,
|
||||
allfeatures => \@allfeatures,
|
||||
comment => $comment
|
||||
};
|
||||
push @architecture_names, $arch
|
||||
unless grep {$_ eq $arch} @architecture_names;
|
||||
} elsif (s/^xsave=//) {
|
||||
my ($name, $value, $required) = split /\s+/;
|
||||
push @xsaveStates,
|
||||
{ id => $name, value => $value, required_for => $required, comment => $comment };
|
||||
} else {
|
||||
my ($name, $function, $bit, $depends) = split /\s+/;
|
||||
die("Unknown CPUID function \"$function\"")
|
||||
unless grep {$_ eq $function} @leafNames;
|
||||
if (my @match = grep { $_->{name} eq $name } @features) {
|
||||
die("internal error") if scalar @match != 1;
|
||||
next if $match[0]->{function} eq $function &&
|
||||
$match[0]->{bit} eq $bit && $match[0]->{depends} eq $depends;
|
||||
die("Duplicate feature \"$name\" with different details. " .
|
||||
"Previously was $match[0]->{function} bit $match[0]->{bit}.");
|
||||
}
|
||||
|
||||
my $id = uc($name);
|
||||
$id =~ s/[^A-Z0-9_]/_/g;
|
||||
push @features,
|
||||
{ name => $name, depends => $depends, id => $id, bit => $bit, leaf => $function, comment => $comment };
|
||||
++$i;
|
||||
die("Too many features to fit a 64-bit integer") if $i > 64;
|
||||
}
|
||||
}
|
||||
close FH;
|
||||
|
||||
# Print the header output
|
||||
my $headername = "";
|
||||
my $headerguard = "";
|
||||
if ($headername = shift @ARGV) {
|
||||
|
||||
$headerguard = uc($headername);
|
||||
$headerguard =~ s/[^A-Z0-9_]/_/g;
|
||||
|
||||
print qq|// This is a generated file. DO NOT EDIT.
|
||||
// Please see $0
|
||||
#ifndef $headerguard
|
||||
#define $headerguard
|
||||
|
||||
#include <stdint.h>|;
|
||||
} else {
|
||||
$debug = 1;
|
||||
}
|
||||
|
||||
# Print the feature list
|
||||
my $lastleaf;
|
||||
for (my $i = 0; $i < scalar @features; ++$i) {
|
||||
my $feature = $features[$i];
|
||||
# Leaf header:
|
||||
printf "\n// in %s:\n", $leaves{$feature->{leaf}}
|
||||
if $feature->{leaf} ne $lastleaf;
|
||||
$lastleaf = $feature->{leaf};
|
||||
|
||||
# Feature
|
||||
printf "#define cpu_feature_%-31s (UINT64_C(1) << %d)\n", lc($feature->{id}), $i;
|
||||
|
||||
# Feature string names for Clang and GCC
|
||||
my $str = $feature->{name} . ',' . $feature->{depends};
|
||||
$str =~ s/,$//;
|
||||
printf "#define QT_FUNCTION_TARGET_STRING_%-17s \"%s\"\n",
|
||||
$feature->{id}, $str;
|
||||
}
|
||||
|
||||
# Print the architecture list
|
||||
print "\n// CPU architectures";
|
||||
for (@architecture_names) {
|
||||
my $arch = $architectures{$_};
|
||||
my $base = $arch->{base};
|
||||
if ($base eq "<>") {
|
||||
$base = "0";
|
||||
} else {
|
||||
$base =~ s/[^A-Za-z0-9_]/_/g;
|
||||
$base = "cpu_" . $base;
|
||||
}
|
||||
|
||||
printf "#define cpu_%-19s (%s", lc($arch->{id}), lc($base);
|
||||
|
||||
for my $f (@{$arch->{features}}) {
|
||||
my @match = grep { $_->{name} eq $f } @features;
|
||||
if (scalar @match == 1) {
|
||||
printf " \\\n%33s| cpu_feature_%s", " ", lc($match[0]->{id});
|
||||
} else {
|
||||
printf STDERR "%s: unknown feature '%s' for CPU '%s'\n", $0, $f, $arch->{name}
|
||||
if $debug;
|
||||
}
|
||||
}
|
||||
print ")";
|
||||
}
|
||||
|
||||
print q{
|
||||
static const uint64_t _compilerCpuFeatures = 0};
|
||||
|
||||
# And print the compiler-enabled features part:
|
||||
for (my $i = 0; $i < scalar @features; ++$i) {
|
||||
my $feature = $features[$i];
|
||||
printf
|
||||
"#ifdef __%s__\n" .
|
||||
" | cpu_feature_%s\n" .
|
||||
"#endif\n",
|
||||
$feature->{id}, lc($feature->{id});
|
||||
}
|
||||
|
||||
print ' ;';
|
||||
if ($headerguard ne "") {
|
||||
print q|
|
||||
#if (defined __cplusplus) && __cplusplus >= 201103L
|
||||
enum X86CpuFeatures : uint64_t {|;
|
||||
|
||||
for (@features) {
|
||||
my $line = sprintf "CpuFeature%s = cpu_feature_%s,", $_->{id}, lc($_->{id});
|
||||
if ($_->{comment} ne "") {
|
||||
printf " %-56s ///< %s\n", $line, $_->{comment};
|
||||
} else {
|
||||
print " $line";
|
||||
}
|
||||
}
|
||||
|
||||
print qq|}; // enum X86CpuFeatures
|
||||
|
||||
enum X86CpuArchitectures : uint64_t {|;
|
||||
|
||||
for (@architecture_names) {
|
||||
my $arch = $architectures{$_};
|
||||
my $name = $arch->{name};
|
||||
$name =~ s/[^A-Za-z0-9]//g;
|
||||
my $line = sprintf "CpuArch%s = cpu_%s,", $name, lc($arch->{id});
|
||||
if ($arch->{comment} ne "") {
|
||||
printf " %-56s ///< %s\n", $line, $arch->{comment};
|
||||
} else {
|
||||
print " $line";
|
||||
}
|
||||
}
|
||||
|
||||
print qq|}; // enum X86cpuArchitectures
|
||||
#endif /* C++11 */\n|;
|
||||
};
|
||||
|
||||
print "// -- implementation start --\n";
|
||||
# Now generate the string table and bit-location array
|
||||
my $offset = 0;
|
||||
my @offsets;
|
||||
print "static const char features_string[] =";
|
||||
for my $feature (@features) {
|
||||
print " \" $feature->{name}\\0\"";
|
||||
push @offsets, $offset;
|
||||
$offset += 2 + length($feature->{name});
|
||||
}
|
||||
print " \"\\0\";";
|
||||
|
||||
# Print the string offset table
|
||||
printf "\nstatic const %s features_indices[] = {",
|
||||
$offset > 255 ? "uint16_t" : "uint8_t";
|
||||
for (my $j = 0; $j < scalar @offsets; ++$j) {
|
||||
printf "%s%3d,",
|
||||
$j % 8 ? " " : "\n ", $offsets[$j];
|
||||
}
|
||||
print "\n};";
|
||||
|
||||
# Print the locator enum and table
|
||||
print "\nenum X86CpuidLeaves {";
|
||||
map { print " $_," } @leafNames;
|
||||
print " X86CpuidMaxLeaf\n};";
|
||||
|
||||
my $type = scalar %leaves > 8 ? "uint16_t" : "uint8_t";
|
||||
printf "\nstatic const %s x86_locators[] = {\n",
|
||||
$type, $type;
|
||||
for (my $j = 0; $j < scalar @features; ++$j) {
|
||||
my $feature = $features[$j];
|
||||
printf " %s*32 + %2d, %s// %s\n",
|
||||
$feature->{leaf}, $feature->{bit}, ' ' x (24 - length($feature->{leaf})), $feature->{name};
|
||||
}
|
||||
print '};';
|
||||
|
||||
# Generate the processor name listing, sorted by feature length
|
||||
my %sorted_archs;
|
||||
for (@architecture_names) {
|
||||
my $arch = $architectures{$_};
|
||||
my $key = sprintf "%02d_%s", scalar(@{$arch->{allfeatures}}), join(',', @{$arch->{allfeatures}});
|
||||
$sorted_archs{$key} = $arch;
|
||||
}
|
||||
print qq|
|
||||
struct X86Architecture
|
||||
{
|
||||
uint64_t features;
|
||||
char name[$maxarchnamelen + 1];
|
||||
};
|
||||
|
||||
static const struct X86Architecture x86_architectures[] = {|;
|
||||
for (sort { $b <=> $a } keys %sorted_archs) {
|
||||
my $arch = $sorted_archs{$_};
|
||||
next if $arch->{base} eq "<>";
|
||||
printf " { cpu_%s, \"%s\" },\n", $arch->{id}, $arch->{prettyname};
|
||||
}
|
||||
print "};";
|
||||
|
||||
# Produce the list of XSAVE states
|
||||
print "\nenum XSaveBits {";
|
||||
my $xsaveEnumPrefix = "XSave_";
|
||||
for my $state (@xsaveStates) {
|
||||
my $value = $state->{value};
|
||||
unless ($value =~ /^0x/) {
|
||||
# Compound value
|
||||
$value = join(" | ", map { $xsaveEnumPrefix . $_ } split(/\|/, $value));
|
||||
}
|
||||
printf " %s%-12s = %s,", $xsaveEnumPrefix, $state->{id}, $value;
|
||||
printf "%s// %s", ' ' x (18 - length($value)), $state->{comment}
|
||||
if $state->{comment} ne '';
|
||||
printf "\n";
|
||||
};
|
||||
print "};";
|
||||
|
||||
# Produce a list of features require extended XSAVE state
|
||||
my $xsaveRequirementMapping;
|
||||
for my $state (@xsaveStates) {
|
||||
my $xsaveReqPrefix = "XSaveReq_";
|
||||
my @required_for = split /,/, $state->{required_for};
|
||||
next unless scalar @required_for;
|
||||
|
||||
my $prefix = sprintf "\n// List of features requiring %s%s\nstatic const uint64_t %s%s = 0",
|
||||
$xsaveEnumPrefix, $state->{id}, $xsaveReqPrefix, $state->{id};
|
||||
|
||||
# match either the feature name or one of its requirements against list
|
||||
# of features that this state is required for
|
||||
for my $feature (@features) {
|
||||
my $id = lc($feature->{id});
|
||||
my $required = 0;
|
||||
for my $requirement (@required_for) {
|
||||
my @depends = split /,/, "$id," . $feature->{depends};
|
||||
$required = grep { $_ eq $requirement } @depends;
|
||||
last if $required;
|
||||
}
|
||||
printf "$prefix\n | cpu_feature_%s", $id if $required;
|
||||
$prefix = "" if $required;
|
||||
}
|
||||
|
||||
if ($prefix eq "") {
|
||||
# we printed something
|
||||
print ";";
|
||||
$xsaveRequirementMapping .= sprintf " { %s%s, %s%s },\n",
|
||||
$xsaveReqPrefix, $state->{id}, $xsaveEnumPrefix, $state->{id};
|
||||
}
|
||||
}
|
||||
|
||||
# Finally, make a table
|
||||
printf qq|
|
||||
struct XSaveRequirementMapping
|
||||
{
|
||||
uint64_t cpu_features;
|
||||
uint64_t xsave_state;
|
||||
};
|
||||
|
||||
static const struct XSaveRequirementMapping xsave_requirements[] = {
|
||||
%s};
|
||||
|
||||
// -- implementation end --
|
||||
#endif /* $headerguard */\n|, $xsaveRequirementMapping if $xsaveRequirementMapping ne "";
|
19
util/x86simdgen/Makefile
Normal file
19
util/x86simdgen/Makefile
Normal file
@ -0,0 +1,19 @@
|
||||
GENERATOR = 3rdparty/x86simd_generate.pl
|
||||
TARGETDIR = ../../src/corelib/global/
|
||||
TARGETCPP = qsimd_x86.cpp
|
||||
TARGETHEADER = qsimd_x86_p.h
|
||||
|
||||
CONF_FILES = 3rdparty/simd-intel.conf
|
||||
# We don't currently use any feature from simd-amd.conf
|
||||
# CONF_FILES += 3rdparty/simd-amd.conf
|
||||
|
||||
all: $(TARGETDIR)/$(TARGETHEADER) $(TARGETDIR)/$(TARGETCPP)
|
||||
$(TARGETHEADER): $(CONF_FILES) | $(GENERATOR)
|
||||
cat $^ | perl $(GENERATOR) /dev/stdin $@ > $@
|
||||
$(TARGETDIR)/$(TARGETHEADER): header $(TARGETHEADER)
|
||||
sed '/-- implementation start --/,/-- implementation end --/d' $^ > $@
|
||||
$(TARGETDIR)/$(TARGETCPP): $(TARGETHEADER) header
|
||||
(cat header; echo '#include "$(TARGETHEADER)"'; sed '1,/-- implementation start --/d;/-- implementation end --/,$$d' $<) > $@
|
||||
|
||||
clean:
|
||||
-$(RM) $(TARGETHEADER)
|
13
util/x86simdgen/README.md
Normal file
13
util/x86simdgen/README.md
Normal file
@ -0,0 +1,13 @@
|
||||
# Scripts to regenerate the x86 SIMD flags
|
||||
|
||||
Upstream: https://github.com/opendcdiag/opendcdiag
|
||||
License: Apache-2.0
|
||||
|
||||
The .conf files are meant to be edited and the options we want to use
|
||||
are uncommented.
|
||||
|
||||
To regenerate:
|
||||
make
|
||||
|
||||
Note: the license of the script does not affect the produced output's
|
||||
license. Therefore, no qt_attribution.json file is provided.
|
@ -1,277 +0,0 @@
|
||||
#!/usr/bin/env perl
|
||||
#############################################################################
|
||||
##
|
||||
## Copyright (C) 2018 Intel Corporation.
|
||||
## Contact: https://www.qt.io/licensing/
|
||||
##
|
||||
## This file is part of the build configuration tools of the Qt Toolkit.
|
||||
##
|
||||
## $QT_BEGIN_LICENSE:MIT$
|
||||
## Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
## of this software and associated documentation files (the "Software"), to deal
|
||||
## in the Software without restriction, including without limitation the rights
|
||||
## to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
## copies of the Software, and to permit persons to whom the Software is
|
||||
## furnished to do so, subject to the following conditions:
|
||||
##
|
||||
## The above copyright notice and this permission notice shall be included in
|
||||
## all copies or substantial portions of the Software.
|
||||
##
|
||||
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
## OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
## THE SOFTWARE.
|
||||
## $QT_END_LICENSE$
|
||||
##
|
||||
#############################################################################
|
||||
|
||||
use strict;
|
||||
$\ = "\n";
|
||||
$/ = "\n";
|
||||
my %leaves = (
|
||||
Leaf1EDX => "CPUID Leaf 1, EDX",
|
||||
Leaf1ECX => "CPUID Leaf 1, ECX",
|
||||
Leaf7_0EBX => "CPUID Leaf 7, Sub-leaf 0, EBX",
|
||||
Leaf7_0ECX => "CPUID Leaf 7, Sub-leaf 0, ECX",
|
||||
Leaf7_0EDX => "CPUID Leaf 7, Sub-leaf 0, EDX",
|
||||
);
|
||||
my @leafNames = sort keys %leaves;
|
||||
|
||||
# Read data from stdin
|
||||
my $i = 1;
|
||||
my @features;
|
||||
while (<STDIN>) {
|
||||
s/#.*$//;
|
||||
chomp;
|
||||
next if $_ eq "";
|
||||
|
||||
my ($name, $function, $bit, $depends) = split /\s+/;
|
||||
die("Unknown CPUID function \"$function\"")
|
||||
unless grep $function, @leafNames;
|
||||
|
||||
my $id = uc($name);
|
||||
$id =~ s/[^A-Z0-9_]/_/g;
|
||||
push @features,
|
||||
{ name => $name, depends => $depends, id => $id, bit => $bit, leaf => $function };
|
||||
++$i;
|
||||
}
|
||||
|
||||
if (my $h = shift @ARGV) {
|
||||
open HEADER, ">", $h;
|
||||
select HEADER;
|
||||
}
|
||||
|
||||
# Print the qsimd_x86_p.h output
|
||||
print q{/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2018 Intel Corporation.
|
||||
** Contact: https://www.qt.io/licensing/
|
||||
**
|
||||
** This file is part of the QtCore module of the Qt Toolkit.
|
||||
**
|
||||
** $QT_BEGIN_LICENSE:LGPL$
|
||||
** Commercial License Usage
|
||||
** Licensees holding valid commercial Qt licenses may use this file in
|
||||
** accordance with the commercial license agreement provided with the
|
||||
** Software or, alternatively, in accordance with the terms contained in
|
||||
** a written agreement between you and The Qt Company. For licensing terms
|
||||
** and conditions see https://www.qt.io/terms-conditions. For further
|
||||
** information use the contact form at https://www.qt.io/contact-us.
|
||||
**
|
||||
** GNU Lesser General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU Lesser
|
||||
** General Public License version 3 as published by the Free Software
|
||||
** Foundation and appearing in the file LICENSE.LGPL3 included in the
|
||||
** packaging of this file. Please review the following information to
|
||||
** ensure the GNU Lesser General Public License version 3 requirements
|
||||
** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
|
||||
**
|
||||
** GNU General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU
|
||||
** General Public License version 2.0 or (at your option) the GNU General
|
||||
** Public license version 3 or any later version approved by the KDE Free
|
||||
** Qt Foundation. The licenses are as published by the Free Software
|
||||
** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
|
||||
** included in the packaging of this file. Please review the following
|
||||
** information to ensure the GNU General Public License requirements will
|
||||
** be met: https://www.gnu.org/licenses/gpl-2.0.html and
|
||||
** https://www.gnu.org/licenses/gpl-3.0.html.
|
||||
**
|
||||
** $QT_END_LICENSE$
|
||||
**
|
||||
****************************************************************************/
|
||||
|
||||
// This is a generated file. DO NOT EDIT.
|
||||
// Please see util/x86simdgen/generate.pl";
|
||||
#ifndef QSIMD_P_H
|
||||
# error "Please include <private/qsimd_p.h> instead"
|
||||
#endif
|
||||
#ifndef QSIMD_X86_P_H
|
||||
#define QSIMD_X86_P_H
|
||||
|
||||
#include "qsimd_p.h"
|
||||
|
||||
//
|
||||
// W A R N I N G
|
||||
// -------------
|
||||
//
|
||||
// This file is not part of the Qt API. It exists purely as an
|
||||
// implementation detail. This header file may change from version to
|
||||
// version without notice, or even be removed.
|
||||
//
|
||||
// We mean it.
|
||||
//
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
// used only to indicate that the CPU detection was initialized
|
||||
#define QSimdInitialized (Q_UINT64_C(1) << 0)};
|
||||
|
||||
# Print the enum
|
||||
my $lastleaf;
|
||||
for (my $i = 0; $i < scalar @features; ++$i) {
|
||||
my $feature = $features[$i];
|
||||
# Leaf header:
|
||||
printf "\n// in %s:\n", $leaves{$feature->{leaf}}
|
||||
if $feature->{leaf} ne $lastleaf;
|
||||
$lastleaf = $feature->{leaf};
|
||||
|
||||
# Feature
|
||||
printf "#define CpuFeature%-33s (Q_UINT64_C(1) << %d)\n", $feature->{id}, $i + 1;
|
||||
|
||||
# Feature string names for Clang and GCC
|
||||
my $str = $feature->{name};
|
||||
$str .= ",$feature->{depends}" if defined($feature->{depends});
|
||||
printf "#define QT_FUNCTION_TARGET_STRING_%-17s \"%s\"\n",
|
||||
$feature->{id}, $str;
|
||||
}
|
||||
|
||||
print q{
|
||||
static const quint64 qCompilerCpuFeatures = 0};
|
||||
|
||||
# And print the compiler-enabled features part:
|
||||
for (my $i = 0; $i < scalar @features; ++$i) {
|
||||
my $feature = $features[$i];
|
||||
printf
|
||||
"#ifdef __%s__\n" .
|
||||
" | CpuFeature%s\n" .
|
||||
"#endif\n",
|
||||
$feature->{id}, $feature->{id};
|
||||
}
|
||||
|
||||
print q{ ;
|
||||
|
||||
QT_END_NAMESPACE
|
||||
|
||||
#endif // QSIMD_X86_P_H
|
||||
};
|
||||
|
||||
if (my $cpp = shift @ARGV) {
|
||||
open CPP, ">", $cpp;
|
||||
select CPP;
|
||||
} else {
|
||||
print q{
|
||||
|
||||
---- cut here, paste the rest into qsimd_x86.cpp ---
|
||||
|
||||
|
||||
};
|
||||
};
|
||||
|
||||
print q{/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2018 Intel Corporation.
|
||||
** Contact: https://www.qt.io/licensing/
|
||||
**
|
||||
** This file is part of the QtCore module of the Qt Toolkit.
|
||||
**
|
||||
** $QT_BEGIN_LICENSE:LGPL$
|
||||
** Commercial License Usage
|
||||
** Licensees holding valid commercial Qt licenses may use this file in
|
||||
** accordance with the commercial license agreement provided with the
|
||||
** Software or, alternatively, in accordance with the terms contained in
|
||||
** a written agreement between you and The Qt Company. For licensing terms
|
||||
** and conditions see https://www.qt.io/terms-conditions. For further
|
||||
** information use the contact form at https://www.qt.io/contact-us.
|
||||
**
|
||||
** GNU Lesser General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU Lesser
|
||||
** General Public License version 3 as published by the Free Software
|
||||
** Foundation and appearing in the file LICENSE.LGPL3 included in the
|
||||
** packaging of this file. Please review the following information to
|
||||
** ensure the GNU Lesser General Public License version 3 requirements
|
||||
** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
|
||||
**
|
||||
** GNU General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU
|
||||
** General Public License version 2.0 or (at your option) the GNU General
|
||||
** Public license version 3 or any later version approved by the KDE Free
|
||||
** Qt Foundation. The licenses are as published by the Free Software
|
||||
** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
|
||||
** included in the packaging of this file. Please review the following
|
||||
** information to ensure the GNU General Public License requirements will
|
||||
** be met: https://www.gnu.org/licenses/gpl-2.0.html and
|
||||
** https://www.gnu.org/licenses/gpl-3.0.html.
|
||||
**
|
||||
** $QT_END_LICENSE$
|
||||
**
|
||||
****************************************************************************/
|
||||
|
||||
// This is a generated file. DO NOT EDIT.
|
||||
// Please see util/x86simdgen/generate.pl";
|
||||
#include "qsimd_p.h"
|
||||
};
|
||||
|
||||
# Now generate the string table and bit-location array
|
||||
my $offset = 0;
|
||||
my @offsets;
|
||||
print "static const char features_string[] =";
|
||||
for my $feature (@features) {
|
||||
print " \" $feature->{name}\\0\"";
|
||||
push @offsets, $offset;
|
||||
$offset += 2 + length($feature->{name});
|
||||
}
|
||||
print " \"\\0\";";
|
||||
|
||||
# Print the string offset table
|
||||
printf "\nstatic const %s features_indices[] = {\n %3d",
|
||||
$offset > 255 ? "quint16" : "quint8", $offset;
|
||||
for (my $j = 0; $j < scalar @offsets; ++$j) {
|
||||
printf ",%s%3d",
|
||||
($j + 1) % 8 ? " " : "\n ", $offsets[$j];
|
||||
}
|
||||
print "\n};";
|
||||
|
||||
# Print the locator enum and table
|
||||
print "\nenum X86CpuidLeaves {";
|
||||
map { print " $_," } @leafNames;
|
||||
print " X86CpuidMaxLeaf\n};";
|
||||
|
||||
my $type = scalar %leaves > 8 ? "quint16" : "quint8";
|
||||
printf "\nstatic const %s x86_locators[] = {",
|
||||
$type, $type;
|
||||
my $lastname;
|
||||
for (my $j = 0; $j < scalar @features; ++$j) {
|
||||
my $feature = $features[$j];
|
||||
printf ", // %s", $lastname
|
||||
if defined($lastname);
|
||||
printf "\n %s*32 + %2d",
|
||||
$feature->{leaf}, $feature->{bit};
|
||||
$lastname = $feature->{name};
|
||||
}
|
||||
printf qq{ // $lastname
|
||||
\};
|
||||
|
||||
// List of AVX512 features (see detectProcessorFeatures())
|
||||
static const quint64 AllAVX512 = 0};
|
||||
|
||||
# Print AVX512 features
|
||||
for (my $j = 0; $j < scalar @features; ++$j) {
|
||||
my $feature = $features[$j];
|
||||
$_ = $feature->{id};
|
||||
printf "\n | CpuFeature%s", $_ if /AVX512/;
|
||||
}
|
||||
print ";";
|
39
util/x86simdgen/header
Normal file
39
util/x86simdgen/header
Normal file
@ -0,0 +1,39 @@
|
||||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2022 Intel Corporation.
|
||||
** Contact: https://www.qt.io/licensing/
|
||||
**
|
||||
** This file is part of the QtCore module of the Qt Toolkit.
|
||||
**
|
||||
** $QT_BEGIN_LICENSE:LGPL$
|
||||
** Commercial License Usage
|
||||
** Licensees holding valid commercial Qt licenses may use this file in
|
||||
** accordance with the commercial license agreement provided with the
|
||||
** Software or, alternatively, in accordance with the terms contained in
|
||||
** a written agreement between you and The Qt Company. For licensing terms
|
||||
** and conditions see https://www.qt.io/terms-conditions. For further
|
||||
** information use the contact form at https://www.qt.io/contact-us.
|
||||
**
|
||||
** GNU Lesser General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU Lesser
|
||||
** General Public License version 3 as published by the Free Software
|
||||
** Foundation and appearing in the file LICENSE.LGPL3 included in the
|
||||
** packaging of this file. Please review the following information to
|
||||
** ensure the GNU Lesser General Public License version 3 requirements
|
||||
** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
|
||||
**
|
||||
** GNU General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU
|
||||
** General Public License version 2.0 or (at your option) the GNU General
|
||||
** Public license version 3 or any later version approved by the KDE Free
|
||||
** Qt Foundation. The licenses are as published by the Free Software
|
||||
** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
|
||||
** included in the packaging of this file. Please review the following
|
||||
** information to ensure the GNU General Public License requirements will
|
||||
** be met: https://www.gnu.org/licenses/gpl-2.0.html and
|
||||
** https://www.gnu.org/licenses/gpl-3.0.html.
|
||||
**
|
||||
** $QT_END_LICENSE$
|
||||
**
|
||||
****************************************************************************/
|
||||
|
@ -1,37 +0,0 @@
|
||||
# Feature CPUID function Bit Required feature
|
||||
sse2 Leaf1EDX 26
|
||||
sse3 Leaf1ECX 0
|
||||
ssse3 Leaf1ECX 9
|
||||
fma Leaf1ECX 12
|
||||
sse4.1 Leaf1ECX 19
|
||||
sse4.2 Leaf1ECX 20
|
||||
movbe Leaf1ECX 22
|
||||
popcnt Leaf1ECX 23
|
||||
aes Leaf1ECX 25 sse4.2
|
||||
avx Leaf1ECX 28
|
||||
f16c Leaf1ECX 29
|
||||
rdrnd Leaf1ECX 30
|
||||
bmi Leaf7_0EBX 3
|
||||
hle Leaf7_0EBX 4
|
||||
avx2 Leaf7_0EBX 5
|
||||
bmi2 Leaf7_0EBX 8
|
||||
rtm Leaf7_0EBX 11
|
||||
avx512f Leaf7_0EBX 16
|
||||
avx512dq Leaf7_0EBX 17
|
||||
rdseed Leaf7_0EBX 18
|
||||
avx512ifma Leaf7_0EBX 21
|
||||
avx512pf Leaf7_0EBX 26
|
||||
avx512er Leaf7_0EBX 27
|
||||
avx512cd Leaf7_0EBX 28
|
||||
sha Leaf7_0EBX 29
|
||||
avx512bw Leaf7_0EBX 30
|
||||
avx512vl Leaf7_0EBX 31
|
||||
avx512vbmi Leaf7_0ECX 1
|
||||
avx512vbmi2 Leaf7_0ECX 6
|
||||
gfni Leaf7_0ECX 8
|
||||
vaes Leaf7_0ECX 9
|
||||
avx512vnni Leaf7_0ECX 11
|
||||
avx512bitalg Leaf7_0ECX 12
|
||||
avx512vpopcntdq Leaf7_0ECX 14
|
||||
avx5124nniw Leaf7_0EDX 2
|
||||
avx5124fmaps Leaf7_0EDX 3
|
Loading…
x
Reference in New Issue
Block a user