qtbase/src/corelib/global/qsimd.cpp
Allan Sandfeld Jensen fe6dc9dc85 Add runtime ARM64 AES check
Adds runtime CPU detection for Windows and macOS, and switches feature
detection of AES to runtime like for x86,

So far only on ARM64, since gcc doesn't do function versioning on ARM32,
but clang can, so it could be added later.

Change-Id: Ibe5d60f48cdae3e366a8ecd6263534ba2b09b131
Reviewed-by: Tor Arne Vestbø <tor.arne.vestbo@qt.io>
Reviewed-by: Alexandru Croitor <alexandru.croitor@qt.io>
2021-05-20 18:31:05 +00:00

784 lines
24 KiB
C++

/****************************************************************************
**
** Copyright (C) 2016 The Qt Company Ltd.
** Copyright (C) 2019 Intel Corporation.
** Contact: https://www.qt.io/licensing/
**
** This file is part of the QtCore module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see https://www.qt.io/terms-conditions. For further
** information use the contact form at https://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 3 as published by the Free Software
** Foundation and appearing in the file LICENSE.LGPL3 included in the
** packaging of this file. Please review the following information to
** ensure the GNU Lesser General Public License version 3 requirements
** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 2.0 or (at your option) the GNU General
** Public license version 3 or any later version approved by the KDE Free
** Qt Foundation. The licenses are as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
** included in the packaging of this file. Please review the following
** information to ensure the GNU General Public License requirements will
** be met: https://www.gnu.org/licenses/gpl-2.0.html and
** https://www.gnu.org/licenses/gpl-3.0.html.
**
** $QT_END_LICENSE$
**
****************************************************************************/
// we need ICC to define the prototype for _rdseed64_step
#define __INTEL_COMPILER_USE_INTRINSIC_PROTOTYPES
#include "qsimd_p.h"
#include "qalgorithms.h"
#include <QByteArray>
#include <stdio.h>
#ifdef Q_OS_LINUX
# include "../testlib/3rdparty/valgrind_p.h"
#endif
#if defined(Q_OS_WIN)
# if !defined(Q_CC_GNU)
# include <intrin.h>
# endif
# if defined(Q_PROCESSOR_ARM64)
# include <processthreadsapi.h>
# endif
#elif defined(Q_OS_LINUX) && defined(Q_PROCESSOR_MIPS_32)
# include "private/qcore_unix_p.h"
#elif QT_CONFIG(getauxval) && defined(Q_PROCESSOR_ARM)
# include <sys/auxv.h>
// the kernel header definitions for HWCAP_*
// (the ones we need/may need anyway)
// copied from <asm/hwcap.h> (ARM)
#define HWCAP_NEON 4096
// copied from <asm/hwcap.h> (ARM):
#define HWCAP2_AES (1 << 0)
#define HWCAP2_CRC32 (1 << 4)
// copied from <asm/hwcap.h> (Aarch64)
#define HWCAP_AES (1 << 3)
#define HWCAP_CRC32 (1 << 7)
// copied from <linux/auxvec.h>
#define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */
#define AT_HWCAP2 26 /* extension of AT_HWCAP */
#elif defined(Q_CC_GHS)
# include <INTEGRITY_types.h>
#elif defined(Q_OS_DARWIN) && defined(Q_PROCESSOR_ARM)
# include <sys/sysctl.h>
#endif
QT_BEGIN_NAMESPACE
#if defined(Q_PROCESSOR_ARM)
/* Data:
neon
crc32
aes
*/
static const char features_string[] =
"\0"
" neon\0"
" crc32\0"
" aes\0";
static const int features_indices[] = { 0, 1, 7, 14 };
#elif defined(Q_PROCESSOR_MIPS)
/* Data:
dsp
dspr2
*/
static const char features_string[] =
"\0"
" dsp\0"
" dspr2\0";
static const int features_indices[] = {
0, 1, 6
};
#elif defined(Q_PROCESSOR_X86)
# include "qsimd_x86.cpp" // generated by util/x86simdgen
#else
static const char features_string[] = "";
static const int features_indices[] = { };
#endif
// end generated
#if defined (Q_OS_NACL)
static inline uint detectProcessorFeatures()
{
return 0;
}
#elif defined(Q_PROCESSOR_ARM)
static inline quint64 detectProcessorFeatures()
{
quint64 features = 0;
#if QT_CONFIG(getauxval)
unsigned long auxvHwCap = getauxval(AT_HWCAP);
if (auxvHwCap != 0) {
# if defined(Q_PROCESSOR_ARM_64)
// For Aarch64:
features |= CpuFeatureNEON; // NEON is always available
if (auxvHwCap & HWCAP_CRC32)
features |= CpuFeatureCRC32;
if (auxvHwCap & HWCAP_AES)
features |= CpuFeatureAES;
# else
// For ARM32:
if (auxvHwCap & HWCAP_NEON)
features |= CpuFeatureNEON;
auxvHwCap = getauxval(AT_HWCAP2);
if (auxvHwCap & HWCAP2_CRC32)
features |= CpuFeatureCRC32;
if (auxvHwCap & HWCAP2_AES)
features |= CpuFeatureAES;
# endif
return features;
}
// fall back to compile-time flags if getauxval failed
#elif defined(Q_OS_DARWIN) && defined(Q_PROCESSOR_ARM)
unsigned feature;
size_t len = sizeof(feature);
if (sysctlbyname("hw.optional.neon", &feature, &len, nullptr, 0) == 0)
features |= feature ? CpuFeatureNEON : 0;
if (sysctlbyname("hw.optional.armv8_crc32", &feature, &len, nullptr, 0) == 0)
features |= feature ? CpuFeatureCRC32 : 0;
// There is currently no optional value for crypto/AES.
#if defined(__ARM_FEATURE_CRYPTO)
features |= CpuFeatureAES;
#endif
return features;
#elif defined(Q_OS_WIN) && defined(Q_PROCESSOR_ARM64)
features |= CpuFeatureNEON;
if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) != 0)
features |= CpuFeatureCRC32;
if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != 0)
features |= CpuFeatureAES;
return features;
#endif
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
features |= CpuFeatureNEON;
#endif
#if defined(__ARM_FEATURE_CRC32)
features |= CpuFeatureCRC32;
#endif
#if defined(__ARM_FEATURE_CRYPTO)
features |= CpuFeatureAES;
#endif
return features;
}
#elif defined(Q_PROCESSOR_X86)
#ifdef Q_PROCESSOR_X86_32
# define PICreg "%%ebx"
#else
# define PICreg "%%rbx"
#endif
static bool checkRdrndWorks() noexcept;
static int maxBasicCpuidSupported()
{
#if defined(Q_CC_EMSCRIPTEN)
return 6; // All features supported by Emscripten
#elif defined(Q_CC_GNU)
qregisterint tmp1;
# if Q_PROCESSOR_X86 < 5
// check if the CPUID instruction is supported
long cpuid_supported;
asm ("pushf\n"
"pop %0\n"
"mov %0, %1\n"
"xor $0x00200000, %0\n"
"push %0\n"
"popf\n"
"pushf\n"
"pop %0\n"
"xor %1, %0\n" // %eax is now 0 if CPUID is not supported
: "=a" (cpuid_supported), "=r" (tmp1)
);
if (!cpuid_supported)
return 0;
# endif
int result;
asm ("xchg " PICreg", %1\n"
"cpuid\n"
"xchg " PICreg", %1\n"
: "=&a" (result), "=&r" (tmp1)
: "0" (0)
: "ecx", "edx");
return result;
#elif defined(Q_OS_WIN)
// Use the __cpuid function; if the CPUID instruction isn't supported, it will return 0
int info[4];
__cpuid(info, 0);
return info[0];
#elif defined(Q_CC_GHS)
unsigned int info[4];
__CPUID(0, info);
return info[0];
#else
return 0;
#endif
}
static void cpuidFeatures01(uint &ecx, uint &edx)
{
#if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)
qregisterint tmp1;
asm ("xchg " PICreg", %2\n"
"cpuid\n"
"xchg " PICreg", %2\n"
: "=&c" (ecx), "=&d" (edx), "=&r" (tmp1)
: "a" (1));
#elif defined(Q_OS_WIN)
int info[4];
__cpuid(info, 1);
ecx = info[2];
edx = info[3];
#elif defined(Q_CC_GHS)
unsigned int info[4];
__CPUID(1, info);
ecx = info[2];
edx = info[3];
#else
Q_UNUSED(ecx);
Q_UNUSED(edx);
#endif
}
#ifdef Q_OS_WIN
inline void __cpuidex(int info[4], int, __int64) { memset(info, 0, 4*sizeof(int));}
#endif
static void cpuidFeatures07_00(uint &ebx, uint &ecx, uint &edx)
{
#if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)
qregisteruint rbx; // in case it's 64-bit
qregisteruint rcx = 0;
qregisteruint rdx = 0;
asm ("xchg " PICreg", %0\n"
"cpuid\n"
"xchg " PICreg", %0\n"
: "=&r" (rbx), "+&c" (rcx), "+&d" (rdx)
: "a" (7));
ebx = rbx;
ecx = rcx;
edx = rdx;
#elif defined(Q_OS_WIN)
int info[4];
__cpuidex(info, 7, 0);
ebx = info[1];
ecx = info[2];
edx = info[3];
#elif defined(Q_CC_GHS)
unsigned int info[4];
__CPUIDEX(7, 0, info);
ebx = info[1];
ecx = info[2];
edx = info[3];
#else
Q_UNUSED(ebx);
Q_UNUSED(ecx);
Q_UNUSED(edx);
#endif
}
#if defined(Q_OS_WIN) && !(defined(Q_CC_GNU) || defined(Q_CC_GHS))
// fallback overload in case this intrinsic does not exist: unsigned __int64 _xgetbv(unsigned int);
inline quint64 _xgetbv(__int64) { return 0; }
#endif
static void xgetbv(uint in, uint &eax, uint &edx)
{
#if (defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)) || defined(Q_CC_GHS)
asm (".byte 0x0F, 0x01, 0xD0" // xgetbv instruction
: "=a" (eax), "=d" (edx)
: "c" (in));
#elif defined(Q_OS_WIN)
quint64 result = _xgetbv(in);
eax = result;
edx = result >> 32;
#else
Q_UNUSED(in);
Q_UNUSED(eax);
Q_UNUSED(edx);
#endif
}
// Flags from the XCR0 state register
enum XCR0Flags {
X87 = 1 << 0,
XMM0_15 = 1 << 1,
YMM0_15Hi128 = 1 << 2,
BNDRegs = 1 << 3,
BNDCSR = 1 << 4,
OpMask = 1 << 5,
ZMM0_15Hi256 = 1 << 6,
ZMM16_31 = 1 << 7,
SSEState = XMM0_15,
AVXState = XMM0_15 | YMM0_15Hi128,
AVX512State = AVXState | OpMask | ZMM0_15Hi256 | ZMM16_31
};
static quint64 adjustedXcr0(quint64 xcr0)
{
/*
* Some OSes hide their capability of context-switching the AVX512 state in
* the XCR0 register. They do that so the first time we execute an
* instruction that may access the AVX512 state (requiring the EVEX prefix)
* they allocate the necessary context switch space.
*
* This behavior is deprecated with the XFD (Extended Feature Disable)
* register, but we can't change existing OSes.
*/
#ifdef Q_OS_DARWIN
// from <machine/cpu_capabilities.h> in xnu
// <https://github.com/apple/darwin-xnu/blob/xnu-4903.221.2/osfmk/i386/cpu_capabilities.h>
constexpr quint64 kHasAVX512F = Q_UINT64_C(0x0000004000000000);
constexpr quintptr commpage = sizeof(void *) > 4 ? Q_UINT64_C(0x00007fffffe00000) : 0xffff0000;
constexpr quintptr cpu_capabilities64 = commpage + 0x10;
quint64 capab = *reinterpret_cast<quint64 *>(cpu_capabilities64);
if (capab & kHasAVX512F)
xcr0 |= AVX512State;
#endif
return xcr0;
}
static quint64 detectProcessorFeatures()
{
static const quint64 AllAVX2 = CpuFeatureAVX2 | AllAVX512;
static const quint64 AllAVX = CpuFeatureAVX | AllAVX2;
quint64 features = 0;
int cpuidLevel = maxBasicCpuidSupported();
#if Q_PROCESSOR_X86 < 5
if (cpuidLevel < 1)
return 0;
#else
Q_ASSERT(cpuidLevel >= 1);
#endif
uint results[X86CpuidMaxLeaf] = {};
cpuidFeatures01(results[Leaf1ECX], results[Leaf1EDX]);
if (cpuidLevel >= 7)
cpuidFeatures07_00(results[Leaf7_0EBX], results[Leaf7_0ECX], results[Leaf7_0EDX]);
// populate our feature list
for (uint i = 0; i < sizeof(x86_locators) / sizeof(x86_locators[0]); ++i) {
uint word = x86_locators[i] / 32;
uint bit = 1U << (x86_locators[i] % 32);
quint64 feature = Q_UINT64_C(1) << (i + 1);
if (results[word] & bit)
features |= feature;
}
// now check the AVX state
quint64 xcr0 = 0;
if (results[Leaf1ECX] & (1u << 27)) {
// XGETBV enabled
uint xgetbvA = 0, xgetbvD = 0;
xgetbv(0, xgetbvA, xgetbvD);
xcr0 = xgetbvA;
if (sizeof(XCR0Flags) > sizeof(xgetbvA))
xcr0 |= quint64(xgetbvD) << 32;
xcr0 = adjustedXcr0(xcr0);
}
if ((xcr0 & AVXState) != AVXState) {
// support for YMM registers is disabled, disable all AVX
features &= ~AllAVX;
} else if ((xcr0 & AVX512State) != AVX512State) {
// support for ZMM registers or mask registers is disabled, disable all AVX512
features &= ~AllAVX512;
}
if (features & CpuFeatureRDRND && !checkRdrndWorks())
features &= ~(CpuFeatureRDRND | CpuFeatureRDSEED);
return features;
}
#elif defined(Q_PROCESSOR_MIPS_32)
#if defined(Q_OS_LINUX)
//
// Do not use QByteArray: it could use SIMD instructions itself at
// some point, thus creating a recursive dependency. Instead, use a
// QSimpleBuffer, which has the bare minimum needed to use memory
// dynamically and read lines from /proc/cpuinfo of arbitrary sizes.
//
struct QSimpleBuffer
{
static const int chunk_size = 256;
char *data;
unsigned alloc;
unsigned size;
QSimpleBuffer() : data(nullptr), alloc(0), size(0) { }
~QSimpleBuffer() { ::free(data); }
void resize(unsigned newsize)
{
if (newsize > alloc) {
unsigned newalloc = chunk_size * ((newsize / chunk_size) + 1);
if (newalloc < newsize)
newalloc = newsize;
if (newalloc != alloc) {
data = static_cast<char *>(::realloc(data, newalloc));
alloc = newalloc;
}
}
size = newsize;
}
void append(const QSimpleBuffer &other, unsigned appendsize)
{
unsigned oldsize = size;
resize(oldsize + appendsize);
::memcpy(data + oldsize, other.data, appendsize);
}
void popleft(unsigned amount)
{
if (amount >= size)
return resize(0);
size -= amount;
::memmove(data, data + amount, size);
}
char *cString()
{
if (!alloc)
resize(1);
return (data[size] = '\0', data);
}
};
//
// Uses a scratch "buffer" (which must be used for all reads done in the
// same file descriptor) to read chunks of data from a file, to read
// one line at a time. Lines include the trailing newline character ('\n').
// On EOF, line.size is zero.
//
static void bufReadLine(int fd, QSimpleBuffer &line, QSimpleBuffer &buffer)
{
for (;;) {
char *newline = static_cast<char *>(::memchr(buffer.data, '\n', buffer.size));
if (newline) {
unsigned piece_size = newline - buffer.data + 1;
line.append(buffer, piece_size);
buffer.popleft(piece_size);
line.resize(line.size - 1);
return;
}
if (buffer.size + QSimpleBuffer::chunk_size > buffer.alloc) {
int oldsize = buffer.size;
buffer.resize(buffer.size + QSimpleBuffer::chunk_size);
buffer.size = oldsize;
}
ssize_t read_bytes =
::qt_safe_read(fd, buffer.data + buffer.size, QSimpleBuffer::chunk_size);
if (read_bytes > 0)
buffer.size += read_bytes;
else
return;
}
}
//
// Checks if any line with a given prefix from /proc/cpuinfo contains
// a certain string, surrounded by spaces.
//
static bool procCpuinfoContains(const char *prefix, const char *string)
{
int cpuinfo_fd = ::qt_safe_open("/proc/cpuinfo", O_RDONLY);
if (cpuinfo_fd == -1)
return false;
unsigned string_len = ::strlen(string);
unsigned prefix_len = ::strlen(prefix);
QSimpleBuffer line, buffer;
bool present = false;
do {
line.resize(0);
bufReadLine(cpuinfo_fd, line, buffer);
char *colon = static_cast<char *>(::memchr(line.data, ':', line.size));
if (colon && line.size > prefix_len + string_len) {
if (!::strncmp(prefix, line.data, prefix_len)) {
// prefix matches, next character must be ':' or space
if (line.data[prefix_len] == ':' || ::isspace(line.data[prefix_len])) {
// Does it contain the string?
char *found = ::strstr(line.cString(), string);
if (found && ::isspace(found[-1]) &&
(::isspace(found[string_len]) || found[string_len] == '\0')) {
present = true;
break;
}
}
}
}
} while (line.size);
::qt_safe_close(cpuinfo_fd);
return present;
}
#endif
static inline quint64 detectProcessorFeatures()
{
// NOTE: MIPS 74K cores are the only ones supporting DSPr2.
quint64 flags = 0;
#if defined __mips_dsp
flags |= CpuFeatureDSP;
# if defined __mips_dsp_rev && __mips_dsp_rev >= 2
flags |= CpuFeatureDSPR2;
# elif defined(Q_OS_LINUX)
if (procCpuinfoContains("cpu model", "MIPS 74Kc") || procCpuinfoContains("cpu model", "MIPS 74Kf"))
flags |= CpuFeatureDSPR2;
# endif
#elif defined(Q_OS_LINUX)
if (procCpuinfoContains("ASEs implemented", "dsp")) {
flags |= CpuFeatureDSP;
if (procCpuinfoContains("cpu model", "MIPS 74Kc") || procCpuinfoContains("cpu model", "MIPS 74Kf"))
flags |= CpuFeatureDSPR2;
}
#endif
return flags;
}
#else
static inline uint detectProcessorFeatures()
{
return 0;
}
#endif
static const int features_count = (sizeof features_indices) / (sizeof features_indices[0]);
// record what CPU features were enabled by default in this Qt build
static const quint64 minFeature = qCompilerCpuFeatures;
#ifdef Q_ATOMIC_INT64_IS_SUPPORTED
Q_CORE_EXPORT QBasicAtomicInteger<quint64> qt_cpu_features[1] = { Q_BASIC_ATOMIC_INITIALIZER(0) };
#else
Q_CORE_EXPORT QBasicAtomicInteger<unsigned> qt_cpu_features[2] = { Q_BASIC_ATOMIC_INITIALIZER(0), Q_BASIC_ATOMIC_INITIALIZER(0) };
#endif
quint64 qDetectCpuFeatures()
{
auto minFeatureTest = minFeature;
#if defined(Q_OS_LINUX) && defined(Q_PROCESSOR_ARM_64)
// Yocto hard-codes CRC32+AES on. Since they are unlikely to be used
// automatically by compilers, we can just add runtime check.
minFeatureTest &= ~(CpuFeatureAES|CpuFeatureCRC32);
#endif
quint64 f = detectProcessorFeatures();
QByteArray disable = qgetenv("QT_NO_CPU_FEATURE");
if (!disable.isEmpty()) {
disable.prepend(' ');
for (int i = 0; i < features_count; ++i) {
if (disable.contains(features_string + features_indices[i]))
f &= ~(Q_UINT64_C(1) << i);
}
}
#ifdef RUNNING_ON_VALGRIND
bool runningOnValgrind = RUNNING_ON_VALGRIND;
#else
bool runningOnValgrind = false;
#endif
if (Q_UNLIKELY(!runningOnValgrind && minFeatureTest != 0 && (f & minFeatureTest) != minFeatureTest)) {
quint64 missing = minFeatureTest & ~f;
fprintf(stderr, "Incompatible processor. This Qt build requires the following features:\n ");
for (int i = 0; i < features_count; ++i) {
if (missing & (Q_UINT64_C(1) << i))
fprintf(stderr, "%s", features_string + features_indices[i]);
}
fprintf(stderr, "\n");
fflush(stderr);
qFatal("Aborted. Incompatible processor: missing feature 0x%llx -%s.", missing,
features_string + features_indices[qCountTrailingZeroBits(missing)]);
}
qt_cpu_features[0].storeRelaxed(f | quint32(QSimdInitialized));
#ifndef Q_ATOMIC_INT64_IS_SUPPORTED
qt_cpu_features[1].storeRelaxed(f >> 32);
#endif
return f;
}
void qDumpCPUFeatures()
{
quint64 features = qCpuFeatures() & ~quint64(QSimdInitialized);
printf("Processor features: ");
for (int i = 0; i < features_count; ++i) {
if (features & (Q_UINT64_C(1) << i))
printf("%s%s", features_string + features_indices[i],
minFeature & (Q_UINT64_C(1) << i) ? "[required]" : "");
}
if ((features = (qCompilerCpuFeatures & ~features))) {
printf("\n!!!!!!!!!!!!!!!!!!!!\n!!! Missing required features:");
for (int i = 0; i < features_count; ++i) {
if (features & (Q_UINT64_C(1) << i))
printf("%s", features_string + features_indices[i]);
}
printf("\n!!! Applications will likely crash with \"Invalid Instruction\"\n!!!!!!!!!!!!!!!!!!!!");
}
puts("");
}
#if defined(Q_PROCESSOR_X86) && QT_COMPILER_SUPPORTS_HERE(RDRND)
# ifdef Q_PROCESSOR_X86_64
# define _rdrandXX_step _rdrand64_step
# define _rdseedXX_step _rdseed64_step
# else
# define _rdrandXX_step _rdrand32_step
# define _rdseedXX_step _rdseed32_step
# endif
// The parameter to _rdrand64_step & _rdseed64_step is unsigned long long for
// Clang and GCC but unsigned __int64 for MSVC and ICC, which is unsigned long
// long on Windows, but unsigned long on Linux.
namespace {
template <typename F> struct ExtractParameter;
template <typename T> struct ExtractParameter<int (T *)> { using Type = T; };
using randuint = ExtractParameter<decltype(_rdrandXX_step)>::Type;
}
# if QT_COMPILER_SUPPORTS_HERE(RDSEED)
static QT_FUNCTION_TARGET(RDSEED) unsigned *qt_random_rdseed(unsigned *ptr, unsigned *end) noexcept
{
// Unlike for the RDRAND code below, the Intel whitepaper describing the
// use of the RDSEED instruction indicates we should not retry in a loop.
// If the independent bit generator used by RDSEED is out of entropy, it
// may take time to replenish.
// https://software.intel.com/en-us/articles/intel-digital-random-number-generator-drng-software-implementation-guide
while (ptr + sizeof(randuint) / sizeof(*ptr) <= end) {
if (_rdseedXX_step(reinterpret_cast<randuint *>(ptr)) == 0)
goto out;
ptr += sizeof(randuint) / sizeof(*ptr);
}
if (sizeof(*ptr) != sizeof(randuint) && ptr != end) {
if (_rdseed32_step(ptr) == 0)
goto out;
++ptr;
}
out:
return ptr;
}
# else
static unsigned *qt_random_rdseed(unsigned *ptr, unsigned *)
{
return ptr;
}
# endif
static QT_FUNCTION_TARGET(RDRND) unsigned *qt_random_rdrnd(unsigned *ptr, unsigned *end) noexcept
{
int retries = 10;
while (ptr + sizeof(randuint)/sizeof(*ptr) <= end) {
if (_rdrandXX_step(reinterpret_cast<randuint *>(ptr)))
ptr += sizeof(randuint)/sizeof(*ptr);
else if (--retries == 0)
goto out;
}
while (sizeof(*ptr) != sizeof(randuint) && ptr != end) {
bool ok = _rdrand32_step(ptr);
if (!ok && --retries)
continue;
if (ok)
++ptr;
break;
}
out:
return ptr;
}
static QT_FUNCTION_TARGET(RDRND) Q_DECL_COLD_FUNCTION bool checkRdrndWorks() noexcept
{
/*
* Some AMD CPUs (e.g. AMD A4-6250J and AMD Ryzen 3000-series) have a
* failing random generation instruction, which always returns
* 0xffffffff, even when generation was "successful".
*
* This code checks if hardware random generator generates four consecutive
* equal numbers. If it does, then we probably have a failing one and
* should disable it completely.
*
* https://bugreports.qt.io/browse/QTBUG-69423
*/
constexpr qsizetype TestBufferSize = 4;
unsigned testBuffer[TestBufferSize] = {};
unsigned *end = qt_random_rdrnd(testBuffer, testBuffer + TestBufferSize);
if (end < testBuffer + 3) {
// Random generation didn't produce enough data for us to make a
// determination whether it's working or not. Assume it isn't, but
// don't print a warning.
return false;
}
// Check the results for equality
if (testBuffer[0] == testBuffer[1]
&& testBuffer[0] == testBuffer[2]
&& (end < testBuffer + TestBufferSize || testBuffer[0] == testBuffer[3])) {
fprintf(stderr, "WARNING: CPU random generator seem to be failing, "
"disabling hardware random number generation\n"
"WARNING: RDRND generated:");
for (unsigned *ptr = testBuffer; ptr < end; ++ptr)
fprintf(stderr, " 0x%x", *ptr);
fprintf(stderr, "\n");
return false;
}
// We're good
return true;
}
QT_FUNCTION_TARGET(RDRND) qsizetype qRandomCpu(void *buffer, qsizetype count) noexcept
{
unsigned *ptr = reinterpret_cast<unsigned *>(buffer);
unsigned *end = ptr + count;
if (qCpuHasFeature(RDSEED))
ptr = qt_random_rdseed(ptr, end);
// fill the buffer with RDRND if RDSEED didn't
ptr = qt_random_rdrnd(ptr, end);
return ptr - reinterpret_cast<unsigned *>(buffer);
}
#elif defined(Q_PROCESSOR_X86) && !defined(Q_OS_NACL) && !defined(Q_PROCESSOR_ARM)
static bool checkRdrndWorks() noexcept { return false; }
#endif // Q_PROCESSOR_X86 && RDRND
QT_END_NAMESPACE