Expand reporting of the Intel instruction set extensions

Detection for most of them is free because we're loading the entire
registers anyway. The only exception is AVX512VBMI, which is in a new
register we hadn't yet read from.

I've also added the new GCC names so they can be used with
QT_FUNCTION_TARGET. The only two exceptions are "movbe" and "popcnt",
which are extremely restricted in use and we are not likely to have code
dedicated to using them.

Change-Id: Ib306f8f647014b399b87ffff13f1d8fd29e58be0
Reviewed-by: Oswald Buddenhagen <oswald.buddenhagen@theqtcompany.com>
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
This commit is contained in:
Thiago Macieira 2015-07-17 14:25:37 -07:00
parent 6a8251a89b
commit 0829baf902
2 changed files with 155 additions and 13 deletions

View File

@ -205,21 +205,24 @@ static void cpuidFeatures01(uint &ecx, uint &edx)
inline void __cpuidex(int info[4], int, __int64) { memset(info, 0, 4*sizeof(int));}
#endif
static void cpuidFeatures07_00(uint &ebx)
static void cpuidFeatures07_00(uint &ebx, uint &ecx)
{
#if defined(Q_CC_GNU)
qregisteruint rbx; // in case it's 64-bit
qregisteruint rcx = 0;
asm ("xchg " PICreg", %0\n"
"cpuid\n"
"xchg " PICreg", %0\n"
: "=&r" (rbx)
: "a" (7), "c" (0)
: "=&r" (rbx), "+&c" (rcx)
: "a" (7)
: "%edx");
ebx = rbx;
ecx = rcx;
#elif defined(Q_OS_WIN)
int info[4];
__cpuidex(info, 7, 0);
ebx = info[1];
ecx = info[2];
#endif
}
@ -257,7 +260,12 @@ static quint64 detectProcessorFeatures()
AVXState = XMM0_15 | YMM0_15Hi128,
AVX512State = AVXState | OpMask | ZMM0_15Hi256 | ZMM16_31
};
static const quint64 AllAVX2 = (Q_UINT64_C(1) << CpuFeatureAVX2);
static const quint64 AllAVX512 = (Q_UINT64_C(1) << CpuFeatureAVX512F) | (Q_UINT64_C(1) << CpuFeatureAVX512CD) |
(Q_UINT64_C(1) << CpuFeatureAVX512ER) | (Q_UINT64_C(1) << CpuFeatureAVX512PF) |
(Q_UINT64_C(1) << CpuFeatureAVX512BW) | (Q_UINT64_C(1) << CpuFeatureAVX512DQ) |
(Q_UINT64_C(1) << CpuFeatureAVX512VL) |
(Q_UINT64_C(1) << CpuFeatureAVX512IFMA) | (Q_UINT64_C(1) << CpuFeatureAVX512VBMI);
static const quint64 AllAVX2 = (Q_UINT64_C(1) << CpuFeatureAVX2) | AllAVX512;
static const quint64 AllAVX = (Q_UINT64_C(1) << CpuFeatureAVX) | AllAVX2;
quint64 features = 0;
@ -295,8 +303,9 @@ static quint64 detectProcessorFeatures()
}
uint cpuid0700EBX = 0;
uint cpuid0700ECX = 0;
if (cpuidLevel >= 7) {
cpuidFeatures07_00(cpuid0700EBX);
cpuidFeatures07_00(cpuid0700EBX, cpuid0700ECX);
// the high 32-bits of features is cpuid0700EBX
features |= quint64(cpuid0700EBX) << 32;
@ -305,6 +314,15 @@ static quint64 detectProcessorFeatures()
if ((xgetbvA & AVXState) != AVXState) {
// support for YMM registers is disabled, disable all AVX
features &= ~AllAVX;
} else if ((xgetbvA & AVX512State) != AVX512State) {
// support for ZMM registers or mask registers is disabled, disable all AVX512
features &= ~AllAVX512;
} else {
// this feature is out of order
if (cpuid0700ECX & (1u << 1))
features |= Q_UINT64_C(1) << CpuFeatureAVX512VBMI;
else
features &= ~(Q_UINT64_C(1) << CpuFeatureAVX512VBMI);
}
return features;
@ -484,7 +502,7 @@ static const int features_indices[] = {
/* Data:
sse3
sse2
avx512vbmi
@ -526,10 +544,31 @@ static const int features_indices[] = {
rtm
avx512f
avx512dq
rdseed
avx512ifma
avx512pf
avx512er
avx512cd
sha
avx512bw
avx512vl
*/
static const char features_string[] =
" sse3\0"
" sse2\0"
" avx512vbmi\0"
" ssse3\0"
" fma\0"
" cmpxchg16b\0"
@ -546,15 +585,27 @@ static const char features_string[] =
" avx2\0"
" bmi2\0"
" rtm\0"
" avx512f\0"
" avx512dq\0"
" rdseed\0"
" avx512ifma\0"
" avx512pf\0"
" avx512er\0"
" avx512cd\0"
" sha\0"
" avx512bw\0"
" avx512vl\0"
"\0";
static const quint8 features_indices[] = {
0, 6, 5, 5, 5, 5, 5, 5,
5, 12, 5, 5, 19, 24, 5, 5,
5, 5, 5, 36, 44, 5, 52, 59,
5, 67, 5, 5, 72, 77, 83, 5,
5, 5, 5, 91, 96, 101, 5, 5,
107, 5, 5, 113
0, 6, 12, 5, 5, 5, 5, 5,
5, 24, 5, 5, 31, 36, 5, 5,
5, 5, 5, 48, 56, 5, 64, 71,
5, 79, 5, 5, 84, 89, 95, 5,
5, 5, 5, 103, 108, 113, 5, 5,
119, 5, 5, 125, 5, 5, 5, 5,
130, 139, 149, 5, 5, 157, 5, 5,
5, 5, 169, 179, 189, 199, 204, 214
};
#else
static const char features_string[] = "";

View File

@ -218,6 +218,23 @@
# endif
#endif
#define QT_FUNCTION_TARGET_STRING_AVX512F "avx512f"
#define QT_FUNCTION_TARGET_STRING_AVX512CD "avx512cd"
#define QT_FUNCTION_TARGET_STRING_AVX512ER "avx512er"
#define QT_FUNCTION_TARGET_STRING_AVX512PF "avx512pf"
#define QT_FUNCTION_TARGET_STRING_AVX512BW "avx512bw"
#define QT_FUNCTION_TARGET_STRING_AVX512DQ "avx512dq"
#define QT_FUNCTION_TARGET_STRING_AVX512VL "avx512vl"
#define QT_FUNCTION_TARGET_STRING_AVX512IFMA "avx512ifma"
#define QT_FUNCTION_TARGET_STRING_AVX512VBMI "avx512vbmi"
#define QT_FUNCTION_TARGET_STRING_F16C "f16c"
#define QT_FUNCTION_TARGET_STRING_RDRAND "rdrnd"
#define QT_FUNCTION_TARGET_STRING_BMI "bmi"
#define QT_FUNCTION_TARGET_STRING_BMI2 "bmi2"
#define QT_FUNCTION_TARGET_STRING_RDSEED "rdseed"
#define QT_FUNCTION_TARGET_STRING_SHA "sha"
// other x86 intrinsics
#if defined(Q_PROCESSOR_X86) && ((defined(Q_CC_GNU) && (Q_CC_GNU >= 404)) \
|| (defined(Q_CC_CLANG) && (Q_CC_CLANG >= 208)) \
@ -260,23 +277,97 @@ enum CPUFeatures {
CpuFeatureSSSE3 = (0 + 9),
CpuFeatureSSE4_1 = (0 + 19),
CpuFeatureSSE4_2 = (0 + 20),
CpuFeatureMOVBE = (0 + 22),
CpuFeaturePOPCNT = (0 + 23),
CpuFeatureAES = (0 + 25),
CpuFeatureAVX = (0 + 28),
CpuFeatureF16C = (0 + 29),
CpuFeatureRDRAND = (0 + 30),
// 31 is always zero and we've used it for the QSimdInitialized
// in level 7, leaf 0, EBX
CpuFeatureBMI = (32 + 3),
CpuFeatureHLE = (32 + 4),
CpuFeatureAVX2 = (32 + 5),
CpuFeatureBMI2 = (32 + 8),
CpuFeatureRTM = (32 + 11),
CpuFeatureAVX512F = (32 + 16),
CpuFeatureAVX512DQ = (32 + 17),
CpuFeatureRDSEED = (32 + 18),
CpuFeatureAVX512IFMA = (32 + 21),
CpuFeatureAVX512PF = (32 + 26),
CpuFeatureAVX512ER = (32 + 27),
CpuFeatureAVX512CD = (32 + 28),
CpuFeatureSHA = (32 + 29),
CpuFeatureAVX512BW = (32 + 30),
CpuFeatureAVX512VL = (32 + 31),
// in level 7, leaf 0, ECX (out of order, for now)
CpuFeatureAVX512VBMI = 2, // uses the bit for DTES64
#endif
// used only to indicate that the CPU detection was initialised
QSimdInitialized = 0x80000000
};
static const uint qCompilerCpuFeatures = 0
static const quint64 qCompilerCpuFeatures = 0
#if defined __SHA__
| (Q_UINT64_C(1) << CpuFeatureSHA)
#endif
#if defined __AES__
| (Q_UINT64_C(1) << CpuFeatureAES)
#endif
#if defined __RTM__
| (Q_UINT64_C(1) << CpuFeatureRTM)
#endif
#ifdef __RDRND__
| (Q_UINT64_C(1) << CpuFeatureRDRAND)
#endif
#ifdef __RDSEED__
| (Q_UINT64_C(1) << CpuFeatureRDSEED)
#endif
#if defined __BMI__
| (Q_UINT64_C(1) << CpuFeatureBMI)
#endif
#if defined __BMI2__
| (Q_UINT64_C(1) << CpuFeatureBMI2)
#endif
#if defined __F16C__
| (Q_UINT64_C(1) << CpuFeatureF16C)
#endif
#if defined __POPCNT__
| (Q_UINT64_C(1) << CpuFeaturePOPCNT)
#endif
#if defined __MOVBE__ // GCC and Clang don't seem to define this
| (Q_UINT64_C(1) << CpuFeatureMOVBE)
#endif
#if defined __AVX512F__
| (Q_UINT64_C(1) << CpuFeatureAVX512F)
#endif
#if defined __AVX512CD__
| (Q_UINT64_C(1) << CpuFeatureAVX512CD)
#endif
#if defined __AVX512ER__
| (Q_UINT64_C(1) << CpuFeatureAVX512ER)
#endif
#if defined __AVX512PF__
| (Q_UINT64_C(1) << CpuFeatureAVX512PF)
#endif
#if defined __AVX512BW__
| (Q_UINT64_C(1) << CpuFeatureAVX512BW)
#endif
#if defined __AVX512DQ__
| (Q_UINT64_C(1) << CpuFeatureAVX512DQ)
#endif
#if defined __AVX512VL__
| (Q_UINT64_C(1) << CpuFeatureAVX512VL)
#endif
#if defined __AVX512IFMA__
| (Q_UINT64_C(1) << CpuFeatureAVX512IFMA)
#endif
#if defined __AVX512VBMI__
| (Q_UINT64_C(1) << CpuFeatureAVX512VBMI)
#endif
#if defined __AVX2__
| (Q_UINT64_C(1) << CpuFeatureAVX2)
#endif