Adds runtime CPU detection for Windows and macOS, and switches feature detection of AES to runtime like for x86, So far only on ARM64, since gcc doesn't do function versioning on ARM32, but clang can, so it could be added later. Change-Id: Ibe5d60f48cdae3e366a8ecd6263534ba2b09b131 Reviewed-by: Tor Arne Vestbø <tor.arne.vestbo@qt.io> Reviewed-by: Alexandru Croitor <alexandru.croitor@qt.io>
784 lines
24 KiB
C++
784 lines
24 KiB
C++
/****************************************************************************
|
|
**
|
|
** Copyright (C) 2016 The Qt Company Ltd.
|
|
** Copyright (C) 2019 Intel Corporation.
|
|
** Contact: https://www.qt.io/licensing/
|
|
**
|
|
** This file is part of the QtCore module of the Qt Toolkit.
|
|
**
|
|
** $QT_BEGIN_LICENSE:LGPL$
|
|
** Commercial License Usage
|
|
** Licensees holding valid commercial Qt licenses may use this file in
|
|
** accordance with the commercial license agreement provided with the
|
|
** Software or, alternatively, in accordance with the terms contained in
|
|
** a written agreement between you and The Qt Company. For licensing terms
|
|
** and conditions see https://www.qt.io/terms-conditions. For further
|
|
** information use the contact form at https://www.qt.io/contact-us.
|
|
**
|
|
** GNU Lesser General Public License Usage
|
|
** Alternatively, this file may be used under the terms of the GNU Lesser
|
|
** General Public License version 3 as published by the Free Software
|
|
** Foundation and appearing in the file LICENSE.LGPL3 included in the
|
|
** packaging of this file. Please review the following information to
|
|
** ensure the GNU Lesser General Public License version 3 requirements
|
|
** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
|
|
**
|
|
** GNU General Public License Usage
|
|
** Alternatively, this file may be used under the terms of the GNU
|
|
** General Public License version 2.0 or (at your option) the GNU General
|
|
** Public license version 3 or any later version approved by the KDE Free
|
|
** Qt Foundation. The licenses are as published by the Free Software
|
|
** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
|
|
** included in the packaging of this file. Please review the following
|
|
** information to ensure the GNU General Public License requirements will
|
|
** be met: https://www.gnu.org/licenses/gpl-2.0.html and
|
|
** https://www.gnu.org/licenses/gpl-3.0.html.
|
|
**
|
|
** $QT_END_LICENSE$
|
|
**
|
|
****************************************************************************/
|
|
|
|
// we need ICC to define the prototype for _rdseed64_step
|
|
#define __INTEL_COMPILER_USE_INTRINSIC_PROTOTYPES
|
|
|
|
#include "qsimd_p.h"
|
|
#include "qalgorithms.h"
|
|
#include <QByteArray>
|
|
#include <stdio.h>
|
|
|
|
#ifdef Q_OS_LINUX
|
|
# include "../testlib/3rdparty/valgrind_p.h"
|
|
#endif
|
|
|
|
#if defined(Q_OS_WIN)
|
|
# if !defined(Q_CC_GNU)
|
|
# include <intrin.h>
|
|
# endif
|
|
# if defined(Q_PROCESSOR_ARM64)
|
|
# include <processthreadsapi.h>
|
|
# endif
|
|
#elif defined(Q_OS_LINUX) && defined(Q_PROCESSOR_MIPS_32)
|
|
# include "private/qcore_unix_p.h"
|
|
#elif QT_CONFIG(getauxval) && defined(Q_PROCESSOR_ARM)
|
|
# include <sys/auxv.h>
|
|
|
|
// the kernel header definitions for HWCAP_*
|
|
// (the ones we need/may need anyway)
|
|
|
|
// copied from <asm/hwcap.h> (ARM)
|
|
#define HWCAP_NEON 4096
|
|
|
|
// copied from <asm/hwcap.h> (ARM):
|
|
#define HWCAP2_AES (1 << 0)
|
|
#define HWCAP2_CRC32 (1 << 4)
|
|
|
|
// copied from <asm/hwcap.h> (Aarch64)
|
|
#define HWCAP_AES (1 << 3)
|
|
#define HWCAP_CRC32 (1 << 7)
|
|
|
|
// copied from <linux/auxvec.h>
|
|
#define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */
|
|
#define AT_HWCAP2 26 /* extension of AT_HWCAP */
|
|
|
|
#elif defined(Q_CC_GHS)
|
|
# include <INTEGRITY_types.h>
|
|
#elif defined(Q_OS_DARWIN) && defined(Q_PROCESSOR_ARM)
|
|
# include <sys/sysctl.h>
|
|
#endif
|
|
|
|
QT_BEGIN_NAMESPACE
|
|
|
|
#if defined(Q_PROCESSOR_ARM)
|
|
/* Data:
|
|
neon
|
|
crc32
|
|
aes
|
|
*/
|
|
static const char features_string[] =
|
|
"\0"
|
|
" neon\0"
|
|
" crc32\0"
|
|
" aes\0";
|
|
static const int features_indices[] = { 0, 1, 7, 14 };
|
|
#elif defined(Q_PROCESSOR_MIPS)
|
|
/* Data:
|
|
dsp
|
|
dspr2
|
|
*/
|
|
static const char features_string[] =
|
|
"\0"
|
|
" dsp\0"
|
|
" dspr2\0";
|
|
|
|
static const int features_indices[] = {
|
|
0, 1, 6
|
|
};
|
|
#elif defined(Q_PROCESSOR_X86)
|
|
# include "qsimd_x86.cpp" // generated by util/x86simdgen
|
|
#else
|
|
static const char features_string[] = "";
|
|
static const int features_indices[] = { };
|
|
#endif
|
|
// end generated
|
|
|
|
#if defined (Q_OS_NACL)
|
|
static inline uint detectProcessorFeatures()
|
|
{
|
|
return 0;
|
|
}
|
|
#elif defined(Q_PROCESSOR_ARM)
|
|
static inline quint64 detectProcessorFeatures()
|
|
{
|
|
quint64 features = 0;
|
|
|
|
#if QT_CONFIG(getauxval)
|
|
unsigned long auxvHwCap = getauxval(AT_HWCAP);
|
|
if (auxvHwCap != 0) {
|
|
# if defined(Q_PROCESSOR_ARM_64)
|
|
// For Aarch64:
|
|
features |= CpuFeatureNEON; // NEON is always available
|
|
if (auxvHwCap & HWCAP_CRC32)
|
|
features |= CpuFeatureCRC32;
|
|
if (auxvHwCap & HWCAP_AES)
|
|
features |= CpuFeatureAES;
|
|
# else
|
|
// For ARM32:
|
|
if (auxvHwCap & HWCAP_NEON)
|
|
features |= CpuFeatureNEON;
|
|
auxvHwCap = getauxval(AT_HWCAP2);
|
|
if (auxvHwCap & HWCAP2_CRC32)
|
|
features |= CpuFeatureCRC32;
|
|
if (auxvHwCap & HWCAP2_AES)
|
|
features |= CpuFeatureAES;
|
|
# endif
|
|
return features;
|
|
}
|
|
// fall back to compile-time flags if getauxval failed
|
|
#elif defined(Q_OS_DARWIN) && defined(Q_PROCESSOR_ARM)
|
|
unsigned feature;
|
|
size_t len = sizeof(feature);
|
|
if (sysctlbyname("hw.optional.neon", &feature, &len, nullptr, 0) == 0)
|
|
features |= feature ? CpuFeatureNEON : 0;
|
|
if (sysctlbyname("hw.optional.armv8_crc32", &feature, &len, nullptr, 0) == 0)
|
|
features |= feature ? CpuFeatureCRC32 : 0;
|
|
// There is currently no optional value for crypto/AES.
|
|
#if defined(__ARM_FEATURE_CRYPTO)
|
|
features |= CpuFeatureAES;
|
|
#endif
|
|
return features;
|
|
#elif defined(Q_OS_WIN) && defined(Q_PROCESSOR_ARM64)
|
|
features |= CpuFeatureNEON;
|
|
if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) != 0)
|
|
features |= CpuFeatureCRC32;
|
|
if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != 0)
|
|
features |= CpuFeatureAES;
|
|
return features;
|
|
#endif
|
|
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
|
|
features |= CpuFeatureNEON;
|
|
#endif
|
|
#if defined(__ARM_FEATURE_CRC32)
|
|
features |= CpuFeatureCRC32;
|
|
#endif
|
|
#if defined(__ARM_FEATURE_CRYPTO)
|
|
features |= CpuFeatureAES;
|
|
#endif
|
|
|
|
return features;
|
|
}
|
|
|
|
#elif defined(Q_PROCESSOR_X86)
|
|
|
|
#ifdef Q_PROCESSOR_X86_32
|
|
# define PICreg "%%ebx"
|
|
#else
|
|
# define PICreg "%%rbx"
|
|
#endif
|
|
|
|
static bool checkRdrndWorks() noexcept;
|
|
|
|
static int maxBasicCpuidSupported()
|
|
{
|
|
#if defined(Q_CC_EMSCRIPTEN)
|
|
return 6; // All features supported by Emscripten
|
|
#elif defined(Q_CC_GNU)
|
|
qregisterint tmp1;
|
|
|
|
# if Q_PROCESSOR_X86 < 5
|
|
// check if the CPUID instruction is supported
|
|
long cpuid_supported;
|
|
asm ("pushf\n"
|
|
"pop %0\n"
|
|
"mov %0, %1\n"
|
|
"xor $0x00200000, %0\n"
|
|
"push %0\n"
|
|
"popf\n"
|
|
"pushf\n"
|
|
"pop %0\n"
|
|
"xor %1, %0\n" // %eax is now 0 if CPUID is not supported
|
|
: "=a" (cpuid_supported), "=r" (tmp1)
|
|
);
|
|
if (!cpuid_supported)
|
|
return 0;
|
|
# endif
|
|
|
|
int result;
|
|
asm ("xchg " PICreg", %1\n"
|
|
"cpuid\n"
|
|
"xchg " PICreg", %1\n"
|
|
: "=&a" (result), "=&r" (tmp1)
|
|
: "0" (0)
|
|
: "ecx", "edx");
|
|
return result;
|
|
#elif defined(Q_OS_WIN)
|
|
// Use the __cpuid function; if the CPUID instruction isn't supported, it will return 0
|
|
int info[4];
|
|
__cpuid(info, 0);
|
|
return info[0];
|
|
#elif defined(Q_CC_GHS)
|
|
unsigned int info[4];
|
|
__CPUID(0, info);
|
|
return info[0];
|
|
#else
|
|
return 0;
|
|
#endif
|
|
}
|
|
|
|
static void cpuidFeatures01(uint &ecx, uint &edx)
|
|
{
|
|
#if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)
|
|
qregisterint tmp1;
|
|
asm ("xchg " PICreg", %2\n"
|
|
"cpuid\n"
|
|
"xchg " PICreg", %2\n"
|
|
: "=&c" (ecx), "=&d" (edx), "=&r" (tmp1)
|
|
: "a" (1));
|
|
#elif defined(Q_OS_WIN)
|
|
int info[4];
|
|
__cpuid(info, 1);
|
|
ecx = info[2];
|
|
edx = info[3];
|
|
#elif defined(Q_CC_GHS)
|
|
unsigned int info[4];
|
|
__CPUID(1, info);
|
|
ecx = info[2];
|
|
edx = info[3];
|
|
#else
|
|
Q_UNUSED(ecx);
|
|
Q_UNUSED(edx);
|
|
#endif
|
|
}
|
|
|
|
#ifdef Q_OS_WIN
|
|
inline void __cpuidex(int info[4], int, __int64) { memset(info, 0, 4*sizeof(int));}
|
|
#endif
|
|
|
|
static void cpuidFeatures07_00(uint &ebx, uint &ecx, uint &edx)
|
|
{
|
|
#if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)
|
|
qregisteruint rbx; // in case it's 64-bit
|
|
qregisteruint rcx = 0;
|
|
qregisteruint rdx = 0;
|
|
asm ("xchg " PICreg", %0\n"
|
|
"cpuid\n"
|
|
"xchg " PICreg", %0\n"
|
|
: "=&r" (rbx), "+&c" (rcx), "+&d" (rdx)
|
|
: "a" (7));
|
|
ebx = rbx;
|
|
ecx = rcx;
|
|
edx = rdx;
|
|
#elif defined(Q_OS_WIN)
|
|
int info[4];
|
|
__cpuidex(info, 7, 0);
|
|
ebx = info[1];
|
|
ecx = info[2];
|
|
edx = info[3];
|
|
#elif defined(Q_CC_GHS)
|
|
unsigned int info[4];
|
|
__CPUIDEX(7, 0, info);
|
|
ebx = info[1];
|
|
ecx = info[2];
|
|
edx = info[3];
|
|
#else
|
|
Q_UNUSED(ebx);
|
|
Q_UNUSED(ecx);
|
|
Q_UNUSED(edx);
|
|
#endif
|
|
}
|
|
|
|
#if defined(Q_OS_WIN) && !(defined(Q_CC_GNU) || defined(Q_CC_GHS))
|
|
// fallback overload in case this intrinsic does not exist: unsigned __int64 _xgetbv(unsigned int);
|
|
inline quint64 _xgetbv(__int64) { return 0; }
|
|
#endif
|
|
static void xgetbv(uint in, uint &eax, uint &edx)
|
|
{
|
|
#if (defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)) || defined(Q_CC_GHS)
|
|
asm (".byte 0x0F, 0x01, 0xD0" // xgetbv instruction
|
|
: "=a" (eax), "=d" (edx)
|
|
: "c" (in));
|
|
#elif defined(Q_OS_WIN)
|
|
quint64 result = _xgetbv(in);
|
|
eax = result;
|
|
edx = result >> 32;
|
|
#else
|
|
Q_UNUSED(in);
|
|
Q_UNUSED(eax);
|
|
Q_UNUSED(edx);
|
|
#endif
|
|
}
|
|
|
|
// Flags from the XCR0 state register
|
|
enum XCR0Flags {
|
|
X87 = 1 << 0,
|
|
XMM0_15 = 1 << 1,
|
|
YMM0_15Hi128 = 1 << 2,
|
|
BNDRegs = 1 << 3,
|
|
BNDCSR = 1 << 4,
|
|
OpMask = 1 << 5,
|
|
ZMM0_15Hi256 = 1 << 6,
|
|
ZMM16_31 = 1 << 7,
|
|
|
|
SSEState = XMM0_15,
|
|
AVXState = XMM0_15 | YMM0_15Hi128,
|
|
AVX512State = AVXState | OpMask | ZMM0_15Hi256 | ZMM16_31
|
|
};
|
|
|
|
static quint64 adjustedXcr0(quint64 xcr0)
|
|
{
|
|
/*
|
|
* Some OSes hide their capability of context-switching the AVX512 state in
|
|
* the XCR0 register. They do that so the first time we execute an
|
|
* instruction that may access the AVX512 state (requiring the EVEX prefix)
|
|
* they allocate the necessary context switch space.
|
|
*
|
|
* This behavior is deprecated with the XFD (Extended Feature Disable)
|
|
* register, but we can't change existing OSes.
|
|
*/
|
|
#ifdef Q_OS_DARWIN
|
|
// from <machine/cpu_capabilities.h> in xnu
|
|
// <https://github.com/apple/darwin-xnu/blob/xnu-4903.221.2/osfmk/i386/cpu_capabilities.h>
|
|
constexpr quint64 kHasAVX512F = Q_UINT64_C(0x0000004000000000);
|
|
constexpr quintptr commpage = sizeof(void *) > 4 ? Q_UINT64_C(0x00007fffffe00000) : 0xffff0000;
|
|
constexpr quintptr cpu_capabilities64 = commpage + 0x10;
|
|
quint64 capab = *reinterpret_cast<quint64 *>(cpu_capabilities64);
|
|
if (capab & kHasAVX512F)
|
|
xcr0 |= AVX512State;
|
|
#endif
|
|
|
|
return xcr0;
|
|
}
|
|
|
|
static quint64 detectProcessorFeatures()
|
|
{
|
|
static const quint64 AllAVX2 = CpuFeatureAVX2 | AllAVX512;
|
|
static const quint64 AllAVX = CpuFeatureAVX | AllAVX2;
|
|
|
|
quint64 features = 0;
|
|
int cpuidLevel = maxBasicCpuidSupported();
|
|
#if Q_PROCESSOR_X86 < 5
|
|
if (cpuidLevel < 1)
|
|
return 0;
|
|
#else
|
|
Q_ASSERT(cpuidLevel >= 1);
|
|
#endif
|
|
|
|
uint results[X86CpuidMaxLeaf] = {};
|
|
cpuidFeatures01(results[Leaf1ECX], results[Leaf1EDX]);
|
|
if (cpuidLevel >= 7)
|
|
cpuidFeatures07_00(results[Leaf7_0EBX], results[Leaf7_0ECX], results[Leaf7_0EDX]);
|
|
|
|
// populate our feature list
|
|
for (uint i = 0; i < sizeof(x86_locators) / sizeof(x86_locators[0]); ++i) {
|
|
uint word = x86_locators[i] / 32;
|
|
uint bit = 1U << (x86_locators[i] % 32);
|
|
quint64 feature = Q_UINT64_C(1) << (i + 1);
|
|
if (results[word] & bit)
|
|
features |= feature;
|
|
}
|
|
|
|
// now check the AVX state
|
|
quint64 xcr0 = 0;
|
|
if (results[Leaf1ECX] & (1u << 27)) {
|
|
// XGETBV enabled
|
|
uint xgetbvA = 0, xgetbvD = 0;
|
|
xgetbv(0, xgetbvA, xgetbvD);
|
|
|
|
xcr0 = xgetbvA;
|
|
if (sizeof(XCR0Flags) > sizeof(xgetbvA))
|
|
xcr0 |= quint64(xgetbvD) << 32;
|
|
xcr0 = adjustedXcr0(xcr0);
|
|
}
|
|
|
|
if ((xcr0 & AVXState) != AVXState) {
|
|
// support for YMM registers is disabled, disable all AVX
|
|
features &= ~AllAVX;
|
|
} else if ((xcr0 & AVX512State) != AVX512State) {
|
|
// support for ZMM registers or mask registers is disabled, disable all AVX512
|
|
features &= ~AllAVX512;
|
|
}
|
|
|
|
if (features & CpuFeatureRDRND && !checkRdrndWorks())
|
|
features &= ~(CpuFeatureRDRND | CpuFeatureRDSEED);
|
|
|
|
return features;
|
|
}
|
|
|
|
#elif defined(Q_PROCESSOR_MIPS_32)
|
|
|
|
#if defined(Q_OS_LINUX)
|
|
//
|
|
// Do not use QByteArray: it could use SIMD instructions itself at
|
|
// some point, thus creating a recursive dependency. Instead, use a
|
|
// QSimpleBuffer, which has the bare minimum needed to use memory
|
|
// dynamically and read lines from /proc/cpuinfo of arbitrary sizes.
|
|
//
|
|
struct QSimpleBuffer
|
|
{
|
|
static const int chunk_size = 256;
|
|
char *data;
|
|
unsigned alloc;
|
|
unsigned size;
|
|
|
|
QSimpleBuffer() : data(nullptr), alloc(0), size(0) { }
|
|
~QSimpleBuffer() { ::free(data); }
|
|
|
|
void resize(unsigned newsize)
|
|
{
|
|
if (newsize > alloc) {
|
|
unsigned newalloc = chunk_size * ((newsize / chunk_size) + 1);
|
|
if (newalloc < newsize)
|
|
newalloc = newsize;
|
|
if (newalloc != alloc) {
|
|
data = static_cast<char *>(::realloc(data, newalloc));
|
|
alloc = newalloc;
|
|
}
|
|
}
|
|
size = newsize;
|
|
}
|
|
void append(const QSimpleBuffer &other, unsigned appendsize)
|
|
{
|
|
unsigned oldsize = size;
|
|
resize(oldsize + appendsize);
|
|
::memcpy(data + oldsize, other.data, appendsize);
|
|
}
|
|
void popleft(unsigned amount)
|
|
{
|
|
if (amount >= size)
|
|
return resize(0);
|
|
size -= amount;
|
|
::memmove(data, data + amount, size);
|
|
}
|
|
char *cString()
|
|
{
|
|
if (!alloc)
|
|
resize(1);
|
|
return (data[size] = '\0', data);
|
|
}
|
|
};
|
|
|
|
//
|
|
// Uses a scratch "buffer" (which must be used for all reads done in the
|
|
// same file descriptor) to read chunks of data from a file, to read
|
|
// one line at a time. Lines include the trailing newline character ('\n').
|
|
// On EOF, line.size is zero.
|
|
//
|
|
static void bufReadLine(int fd, QSimpleBuffer &line, QSimpleBuffer &buffer)
|
|
{
|
|
for (;;) {
|
|
char *newline = static_cast<char *>(::memchr(buffer.data, '\n', buffer.size));
|
|
if (newline) {
|
|
unsigned piece_size = newline - buffer.data + 1;
|
|
line.append(buffer, piece_size);
|
|
buffer.popleft(piece_size);
|
|
line.resize(line.size - 1);
|
|
return;
|
|
}
|
|
if (buffer.size + QSimpleBuffer::chunk_size > buffer.alloc) {
|
|
int oldsize = buffer.size;
|
|
buffer.resize(buffer.size + QSimpleBuffer::chunk_size);
|
|
buffer.size = oldsize;
|
|
}
|
|
ssize_t read_bytes =
|
|
::qt_safe_read(fd, buffer.data + buffer.size, QSimpleBuffer::chunk_size);
|
|
if (read_bytes > 0)
|
|
buffer.size += read_bytes;
|
|
else
|
|
return;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Checks if any line with a given prefix from /proc/cpuinfo contains
|
|
// a certain string, surrounded by spaces.
|
|
//
|
|
static bool procCpuinfoContains(const char *prefix, const char *string)
|
|
{
|
|
int cpuinfo_fd = ::qt_safe_open("/proc/cpuinfo", O_RDONLY);
|
|
if (cpuinfo_fd == -1)
|
|
return false;
|
|
|
|
unsigned string_len = ::strlen(string);
|
|
unsigned prefix_len = ::strlen(prefix);
|
|
QSimpleBuffer line, buffer;
|
|
bool present = false;
|
|
do {
|
|
line.resize(0);
|
|
bufReadLine(cpuinfo_fd, line, buffer);
|
|
char *colon = static_cast<char *>(::memchr(line.data, ':', line.size));
|
|
if (colon && line.size > prefix_len + string_len) {
|
|
if (!::strncmp(prefix, line.data, prefix_len)) {
|
|
// prefix matches, next character must be ':' or space
|
|
if (line.data[prefix_len] == ':' || ::isspace(line.data[prefix_len])) {
|
|
// Does it contain the string?
|
|
char *found = ::strstr(line.cString(), string);
|
|
if (found && ::isspace(found[-1]) &&
|
|
(::isspace(found[string_len]) || found[string_len] == '\0')) {
|
|
present = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} while (line.size);
|
|
|
|
::qt_safe_close(cpuinfo_fd);
|
|
return present;
|
|
}
|
|
#endif
|
|
|
|
static inline quint64 detectProcessorFeatures()
|
|
{
|
|
// NOTE: MIPS 74K cores are the only ones supporting DSPr2.
|
|
quint64 flags = 0;
|
|
|
|
#if defined __mips_dsp
|
|
flags |= CpuFeatureDSP;
|
|
# if defined __mips_dsp_rev && __mips_dsp_rev >= 2
|
|
flags |= CpuFeatureDSPR2;
|
|
# elif defined(Q_OS_LINUX)
|
|
if (procCpuinfoContains("cpu model", "MIPS 74Kc") || procCpuinfoContains("cpu model", "MIPS 74Kf"))
|
|
flags |= CpuFeatureDSPR2;
|
|
# endif
|
|
#elif defined(Q_OS_LINUX)
|
|
if (procCpuinfoContains("ASEs implemented", "dsp")) {
|
|
flags |= CpuFeatureDSP;
|
|
if (procCpuinfoContains("cpu model", "MIPS 74Kc") || procCpuinfoContains("cpu model", "MIPS 74Kf"))
|
|
flags |= CpuFeatureDSPR2;
|
|
}
|
|
#endif
|
|
|
|
return flags;
|
|
}
|
|
|
|
#else
|
|
static inline uint detectProcessorFeatures()
|
|
{
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
static const int features_count = (sizeof features_indices) / (sizeof features_indices[0]);
|
|
|
|
// record what CPU features were enabled by default in this Qt build
|
|
static const quint64 minFeature = qCompilerCpuFeatures;
|
|
|
|
#ifdef Q_ATOMIC_INT64_IS_SUPPORTED
|
|
Q_CORE_EXPORT QBasicAtomicInteger<quint64> qt_cpu_features[1] = { Q_BASIC_ATOMIC_INITIALIZER(0) };
|
|
#else
|
|
Q_CORE_EXPORT QBasicAtomicInteger<unsigned> qt_cpu_features[2] = { Q_BASIC_ATOMIC_INITIALIZER(0), Q_BASIC_ATOMIC_INITIALIZER(0) };
|
|
#endif
|
|
|
|
quint64 qDetectCpuFeatures()
|
|
{
|
|
auto minFeatureTest = minFeature;
|
|
#if defined(Q_OS_LINUX) && defined(Q_PROCESSOR_ARM_64)
|
|
// Yocto hard-codes CRC32+AES on. Since they are unlikely to be used
|
|
// automatically by compilers, we can just add runtime check.
|
|
minFeatureTest &= ~(CpuFeatureAES|CpuFeatureCRC32);
|
|
#endif
|
|
quint64 f = detectProcessorFeatures();
|
|
QByteArray disable = qgetenv("QT_NO_CPU_FEATURE");
|
|
if (!disable.isEmpty()) {
|
|
disable.prepend(' ');
|
|
for (int i = 0; i < features_count; ++i) {
|
|
if (disable.contains(features_string + features_indices[i]))
|
|
f &= ~(Q_UINT64_C(1) << i);
|
|
}
|
|
}
|
|
|
|
#ifdef RUNNING_ON_VALGRIND
|
|
bool runningOnValgrind = RUNNING_ON_VALGRIND;
|
|
#else
|
|
bool runningOnValgrind = false;
|
|
#endif
|
|
if (Q_UNLIKELY(!runningOnValgrind && minFeatureTest != 0 && (f & minFeatureTest) != minFeatureTest)) {
|
|
quint64 missing = minFeatureTest & ~f;
|
|
fprintf(stderr, "Incompatible processor. This Qt build requires the following features:\n ");
|
|
for (int i = 0; i < features_count; ++i) {
|
|
if (missing & (Q_UINT64_C(1) << i))
|
|
fprintf(stderr, "%s", features_string + features_indices[i]);
|
|
}
|
|
fprintf(stderr, "\n");
|
|
fflush(stderr);
|
|
qFatal("Aborted. Incompatible processor: missing feature 0x%llx -%s.", missing,
|
|
features_string + features_indices[qCountTrailingZeroBits(missing)]);
|
|
}
|
|
|
|
qt_cpu_features[0].storeRelaxed(f | quint32(QSimdInitialized));
|
|
#ifndef Q_ATOMIC_INT64_IS_SUPPORTED
|
|
qt_cpu_features[1].storeRelaxed(f >> 32);
|
|
#endif
|
|
return f;
|
|
}
|
|
|
|
void qDumpCPUFeatures()
|
|
{
|
|
quint64 features = qCpuFeatures() & ~quint64(QSimdInitialized);
|
|
printf("Processor features: ");
|
|
for (int i = 0; i < features_count; ++i) {
|
|
if (features & (Q_UINT64_C(1) << i))
|
|
printf("%s%s", features_string + features_indices[i],
|
|
minFeature & (Q_UINT64_C(1) << i) ? "[required]" : "");
|
|
}
|
|
if ((features = (qCompilerCpuFeatures & ~features))) {
|
|
printf("\n!!!!!!!!!!!!!!!!!!!!\n!!! Missing required features:");
|
|
for (int i = 0; i < features_count; ++i) {
|
|
if (features & (Q_UINT64_C(1) << i))
|
|
printf("%s", features_string + features_indices[i]);
|
|
}
|
|
printf("\n!!! Applications will likely crash with \"Invalid Instruction\"\n!!!!!!!!!!!!!!!!!!!!");
|
|
}
|
|
puts("");
|
|
}
|
|
|
|
#if defined(Q_PROCESSOR_X86) && QT_COMPILER_SUPPORTS_HERE(RDRND)
|
|
|
|
# ifdef Q_PROCESSOR_X86_64
|
|
# define _rdrandXX_step _rdrand64_step
|
|
# define _rdseedXX_step _rdseed64_step
|
|
# else
|
|
# define _rdrandXX_step _rdrand32_step
|
|
# define _rdseedXX_step _rdseed32_step
|
|
# endif
|
|
|
|
// The parameter to _rdrand64_step & _rdseed64_step is unsigned long long for
|
|
// Clang and GCC but unsigned __int64 for MSVC and ICC, which is unsigned long
|
|
// long on Windows, but unsigned long on Linux.
|
|
namespace {
|
|
template <typename F> struct ExtractParameter;
|
|
template <typename T> struct ExtractParameter<int (T *)> { using Type = T; };
|
|
using randuint = ExtractParameter<decltype(_rdrandXX_step)>::Type;
|
|
}
|
|
|
|
# if QT_COMPILER_SUPPORTS_HERE(RDSEED)
|
|
static QT_FUNCTION_TARGET(RDSEED) unsigned *qt_random_rdseed(unsigned *ptr, unsigned *end) noexcept
|
|
{
|
|
// Unlike for the RDRAND code below, the Intel whitepaper describing the
|
|
// use of the RDSEED instruction indicates we should not retry in a loop.
|
|
// If the independent bit generator used by RDSEED is out of entropy, it
|
|
// may take time to replenish.
|
|
// https://software.intel.com/en-us/articles/intel-digital-random-number-generator-drng-software-implementation-guide
|
|
while (ptr + sizeof(randuint) / sizeof(*ptr) <= end) {
|
|
if (_rdseedXX_step(reinterpret_cast<randuint *>(ptr)) == 0)
|
|
goto out;
|
|
ptr += sizeof(randuint) / sizeof(*ptr);
|
|
}
|
|
|
|
if (sizeof(*ptr) != sizeof(randuint) && ptr != end) {
|
|
if (_rdseed32_step(ptr) == 0)
|
|
goto out;
|
|
++ptr;
|
|
}
|
|
|
|
out:
|
|
return ptr;
|
|
}
|
|
# else
|
|
static unsigned *qt_random_rdseed(unsigned *ptr, unsigned *)
|
|
{
|
|
return ptr;
|
|
}
|
|
# endif
|
|
|
|
static QT_FUNCTION_TARGET(RDRND) unsigned *qt_random_rdrnd(unsigned *ptr, unsigned *end) noexcept
|
|
{
|
|
int retries = 10;
|
|
while (ptr + sizeof(randuint)/sizeof(*ptr) <= end) {
|
|
if (_rdrandXX_step(reinterpret_cast<randuint *>(ptr)))
|
|
ptr += sizeof(randuint)/sizeof(*ptr);
|
|
else if (--retries == 0)
|
|
goto out;
|
|
}
|
|
|
|
while (sizeof(*ptr) != sizeof(randuint) && ptr != end) {
|
|
bool ok = _rdrand32_step(ptr);
|
|
if (!ok && --retries)
|
|
continue;
|
|
if (ok)
|
|
++ptr;
|
|
break;
|
|
}
|
|
|
|
out:
|
|
return ptr;
|
|
}
|
|
|
|
static QT_FUNCTION_TARGET(RDRND) Q_DECL_COLD_FUNCTION bool checkRdrndWorks() noexcept
|
|
{
|
|
/*
|
|
* Some AMD CPUs (e.g. AMD A4-6250J and AMD Ryzen 3000-series) have a
|
|
* failing random generation instruction, which always returns
|
|
* 0xffffffff, even when generation was "successful".
|
|
*
|
|
* This code checks if hardware random generator generates four consecutive
|
|
* equal numbers. If it does, then we probably have a failing one and
|
|
* should disable it completely.
|
|
*
|
|
* https://bugreports.qt.io/browse/QTBUG-69423
|
|
*/
|
|
constexpr qsizetype TestBufferSize = 4;
|
|
unsigned testBuffer[TestBufferSize] = {};
|
|
|
|
unsigned *end = qt_random_rdrnd(testBuffer, testBuffer + TestBufferSize);
|
|
if (end < testBuffer + 3) {
|
|
// Random generation didn't produce enough data for us to make a
|
|
// determination whether it's working or not. Assume it isn't, but
|
|
// don't print a warning.
|
|
return false;
|
|
}
|
|
|
|
// Check the results for equality
|
|
if (testBuffer[0] == testBuffer[1]
|
|
&& testBuffer[0] == testBuffer[2]
|
|
&& (end < testBuffer + TestBufferSize || testBuffer[0] == testBuffer[3])) {
|
|
fprintf(stderr, "WARNING: CPU random generator seem to be failing, "
|
|
"disabling hardware random number generation\n"
|
|
"WARNING: RDRND generated:");
|
|
for (unsigned *ptr = testBuffer; ptr < end; ++ptr)
|
|
fprintf(stderr, " 0x%x", *ptr);
|
|
fprintf(stderr, "\n");
|
|
return false;
|
|
}
|
|
|
|
// We're good
|
|
return true;
|
|
}
|
|
|
|
QT_FUNCTION_TARGET(RDRND) qsizetype qRandomCpu(void *buffer, qsizetype count) noexcept
|
|
{
|
|
unsigned *ptr = reinterpret_cast<unsigned *>(buffer);
|
|
unsigned *end = ptr + count;
|
|
|
|
if (qCpuHasFeature(RDSEED))
|
|
ptr = qt_random_rdseed(ptr, end);
|
|
|
|
// fill the buffer with RDRND if RDSEED didn't
|
|
ptr = qt_random_rdrnd(ptr, end);
|
|
return ptr - reinterpret_cast<unsigned *>(buffer);
|
|
}
|
|
#elif defined(Q_PROCESSOR_X86) && !defined(Q_OS_NACL) && !defined(Q_PROCESSOR_ARM)
|
|
static bool checkRdrndWorks() noexcept { return false; }
|
|
#endif // Q_PROCESSOR_X86 && RDRND
|
|
|
|
QT_END_NAMESPACE
|