qHash: implement an AES hasher for QLatin1StringView

It's the same aeshash() as before, except we're passing a template
parameter to indicate whether to read half and then zero-extend the
data. That is, it will perform a conversion from Latin1 on the fly.

When running in zero-extending mode, the length parameters are actually
doubled (counting the number of UTF-16 code units) and we then divide
again by 2 when advancing.

The implementation should have the following performance
characteristics:
* QLatin1StringView now will be roughly half as fast as Qt 6.7
* QLatin1StringView now will be roughly as fast as QStringView

For the aeshash128() in default builds of QtCore (will use SSE4.1), the
long loop (32 characters or more) is:

      QStringView                             QLatin1StringView
    movdqu -0x20(%rax),%xmm4       |        pmovzxbw -0x10(%rdx),%xmm2
    movdqu -0x10(%rax),%xmm5       |        pmovzxbw -0x8(%rdx),%xmm3
    add    $0x20,%rax              |        add    $0x10,%rdx
    pxor   %xmm4,%xmm0             |        pxor   %xmm2,%xmm0
    pxor   %xmm5,%xmm1             |        pxor   %xmm3,%xmm1
    aesenc %xmm0,%xmm0                      aesenc %xmm0,%xmm0
    aesenc %xmm1,%xmm1                      aesenc %xmm1,%xmm1
    aesenc %xmm0,%xmm0                      aesenc %xmm0,%xmm0
    aesenc %xmm1,%xmm1                      aesenc %xmm1,%xmm1

The number of instructions is identical, but there are actually 2 more
uops per iteration. LLVM-MCA simulation shows this should execute in the
same number of cycles on older CPUs that do not have support for VAES
(see <https://analysis.godbolt.org/z/x95Mrfrf7>).

For the VAES version in aeshash256() and the AVX10 version in
aeshash256_256():

      QStringView                             QLatin1StringView
    vpxor  -0x40(%rax),%ymm1,%ym   |        vpmovzxbw -0x20(%rax),%ymm3
    vpxor  -0x20(%rax),%ymm0,%ym   |        vpmovzxbw -0x10(%rax),%ymm2
    add    $0x40,%rax              |        add    $0x20,%rax
                                   |        vpxor  %ymm3,%ymm0,%ymm0
                                   |        vpxor  %ymm2,%ymm1,%ymm1
    vaesenc %ymm1,%ymm1,%ymm1      <
    vaesenc %ymm0,%ymm0,%ymm0               vaesenc %ymm0,%ymm0,%ymm0
    vaesenc %ymm1,%ymm1,%ymm1               vaesenc %ymm1,%ymm1,%ymm1
    vaesenc %ymm0,%ymm0,%ymm0               vaesenc %ymm0,%ymm0,%ymm0
                                   >        vaesenc %ymm1,%ymm1,%ymm1

In this case, the increase in number of instructions matches the
increase in number of uops. The LLVM-MCA simulation says that the
QLatin1StringView version is faster at 11 cycles/iteration vs 14 cyc/it
(see <https://analysis.godbolt.org/z/1Gv1coz13>), but that can't be
right.

Measured performance of CPU cycles, on an Intel Core i9-7940X (Skylake,
no VAES support), normalized on the QString performance (QByteArray is
used as a stand-in for the performance in Qt 6.7):

                        aeshash              |  siphash
                QByteArray  QL1SV   QString     QByteArray  QString
dictionary      94.5%       79.7%   100.0%      150.5%*     159.8%
paths-small     90.2%       93.2%   100.0%      202.8%      290.3%
uuids           81.8%       100.7%  100.0%      215.2%      350.7%
longstrings     42.5%       100.8%  100.0%      185.7%      353.2%
numbers         95.5%       77.9%   100.0%      155.3%*     164.5%

On an Intel Core i7-1165G7 (Tiger Lake, capable of VAES and AVX512VL):

                        aeshash              |  siphash
                QByteArray  QL1SV   QString     QByteArray  QString
dictionary      90.0%       91.1%   100.0%      103.3%*     157.1%
paths-small     99.4%       104.8%  100.0%      237.5%      358.0%
uuids           88.5%       117.6%  100.0%      274.5%      461.7%
longstrings     57.4%       111.2%  100.0%      503.0%      974.3%
numbers         90.6%       89.7%   100.0%      98.7%*      149.9%

On an Intel 4th Generation Xeon Scalable Platinum (Sapphire Rapids, same
Golden Cove core as Alder Lake):

                        aeshash              |  siphash
                QByteArray  QL1SV   QString     QByteArray  QString
dictionary      89.9%       102.1%  100.0%      158.1%*     172.7%
paths-small     78.0%       89.4%   100.0%      159.4%      258.0%
uuids           109.1%      107.9%  100.0%      279.0%      496.3%
longstrings     52.1%       112.4%  100.0%      564.4%      1078.3%
numbers         85.8%       98.9%   100.0%      152.6%*     190.4%

* dictionary contains very short entries (6 characters)
* paths-small contains strings of varying length, but very few over 32
* uuids-list contains fixed-length strings (38 characters)
* longstrings is the same but 304 characters
* numbers also a lot contains very short strings (1 to 6 chars)

What this shows:
* For short strings, the performance difference is negligible between
  all three
* For longer strings, QLatin1StringView now costs between 7 and 17% more
  than QString on the tested machines instead of up to ~50% less, except on
  the older machine (where I think the main QString hashing is suffering
  from memory bandwidth limitations)
* The AES hash implementation is anywhere from 1.6 to 11x faster than
  Siphash
* Murmurhash (marked with asterisk) is much faster than Siphash, but it
  only managed to beat the AES hash in one test

Change-Id: I664b9f014ffc48cbb49bfffd17b045c1811ac0ed
Reviewed-by: Qt CI Bot <qt_ci_bot@qt-project.org>
Reviewed-by: Mårten Nordheim <marten.nordheim@qt.io>
This commit is contained in:
Thiago Macieira 2024-02-02 22:15:56 -08:00
parent 970aad5418
commit 55959aefab
5 changed files with 178 additions and 64 deletions

View File

@ -616,8 +616,39 @@ namespace {
// the scrambling round (step 3 in [1]) because it's just very good at
// spreading the bits around.
//
// Note on Latin-1 hashing (ZX == ByteToWord): for simplicity of the
// algorithm, we pass sizes equivalent to the UTF-16 content (ZX == None).
// That means we must multiply by 2 on entry, divide by 2 on pointer
// advancing, and load half as much data from memory (though we produce
// exactly as much data in registers). The compilers appear to optimize
// this out.
//
// [1] https://en.wikipedia.org/wiki/Advanced_Encryption_Standard#High-level_description_of_the_algorithm
template <ZeroExtension ZX, typename T> static const T *advance(const T *ptr, ptrdiff_t n)
{
if constexpr (ZX == None)
return ptr + n;
// see note above on ZX == ByteToWord hashing
auto p = reinterpret_cast<const uchar *>(ptr);
n *= sizeof(T);
return reinterpret_cast<const T *>(p + n/2);
}
template <ZeroExtension> static __m128i loadu128(const void *ptr);
template <> Q_ALWAYS_INLINE QT_FUNCTION_TARGET(AES) __m128i loadu128<None>(const void *ptr)
{
return _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
}
template <> Q_ALWAYS_INLINE QT_FUNCTION_TARGET(AES) __m128i loadu128<ByteToWord>(const void *ptr)
{
// use a MOVQ followed by PMOVZXBW
// the compiler usually combines them as a single, loading PMOVZXBW
__m128i data = _mm_loadl_epi64(static_cast<const __m128i *>(ptr));
return _mm_cvtepu8_epi16(data);
}
// hash 16 bytes, running 3 scramble rounds of AES on itself (like label "final1")
static void Q_ALWAYS_INLINE QT_FUNCTION_TARGET(AES) QT_VECTORCALL
hash16bytes(__m128i &state0, __m128i data)
@ -629,11 +660,12 @@ namespace {
}
// hash twice 16 bytes, running 2 scramble rounds of AES on itself
template <ZeroExtension ZX>
static void QT_FUNCTION_TARGET(AES) QT_VECTORCALL
hash2x16bytes(__m128i &state0, __m128i &state1, const __m128i *src0, const __m128i *src1)
{
__m128i data0 = _mm_loadu_si128(src0);
__m128i data1 = _mm_loadu_si128(src1);
__m128i data0 = loadu128<ZX>(src0);
__m128i data1 = loadu128<ZX>(src1);
state0 = _mm_xor_si128(data0, state0);
state1 = _mm_xor_si128(data1, state1);
state0 = _mm_aesenc_si128(state0, state0);
@ -680,16 +712,18 @@ Q_ALWAYS_INLINE __m128i AESHashSeed::state1() const
}
}
template <ZeroExtension ZX>
static size_t QT_FUNCTION_TARGET(AES) QT_VECTORCALL
aeshash128_16to32(__m128i state0, __m128i state1, const __m128i *src, const __m128i *srcend)
{
{
if (src + 1 < srcend) {
const __m128i *src2 = advance<ZX>(srcend, -1);
if (advance<ZX>(src, 1) < srcend) {
// epilogue: between 16 and 31 bytes
hash2x16bytes(state0, state1, src, srcend - 1);
hash2x16bytes<ZX>(state0, state1, src, src2);
} else if (src != srcend) {
// epilogue: between 1 and 16 bytes, overlap with the end
__m128i data = _mm_loadu_si128(srcend - 1);
__m128i data = loadu128<ZX>(src2);
hash16bytes(state0, data);
}
@ -700,8 +734,21 @@ aeshash128_16to32(__m128i state0, __m128i state1, const __m128i *src, const __m1
return mm_cvtsi128_sz(state0);
}
// load all 16 bytes and mask off the bytes past the end of the source
static const qint8 maskarray[] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
// load 16 bytes ending at the data end, then shuffle them to the beginning
static const qint8 shufflecontrol[] = {
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
};
template <ZeroExtension ZX>
static size_t QT_FUNCTION_TARGET(AES) QT_VECTORCALL
aeshash128_lt16(__m128i state0, const uchar *p, size_t len)
aeshash128_lt16(__m128i state0, const __m128i *src, const __m128i *srcend, size_t len)
{
if (len) {
// We're going to load 16 bytes and mask zero the part we don't care
@ -712,25 +759,15 @@ aeshash128_lt16(__m128i state0, const uchar *p, size_t len)
constexpr quintptr PageSize = 4096;
__m128i data;
if ((quintptr(p) & (PageSize / 2)) == 0) {
if ((quintptr(src) & (PageSize / 2)) == 0) {
// lower half of the page:
// load all 16 bytes and mask off the bytes past the end of the source
static const qint8 maskarray[] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
__m128i mask = _mm_loadu_si128(reinterpret_cast<const __m128i *>(maskarray + 15 - len));
data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p));
data = loadu128<ZX>(src);
data = _mm_and_si128(data, mask);
} else {
// upper half of the page:
// load 16 bytes ending at the data end, then shuffle them to the beginning
static const qint8 shufflecontrol[] = {
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
};
__m128i control = _mm_loadu_si128(reinterpret_cast<const __m128i *>(shufflecontrol + 15 - len));
data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p + len) - 1);
data = loadu128<ZX>(advance<ZX>(srcend, -1));
data = _mm_shuffle_epi8(data, control);
}
@ -739,24 +776,45 @@ aeshash128_lt16(__m128i state0, const uchar *p, size_t len)
return mm_cvtsi128_sz(state0);
}
template <ZeroExtension ZX>
static size_t QT_FUNCTION_TARGET(AES) QT_VECTORCALL
aeshash128_ge32(__m128i state0, __m128i state1, const __m128i *src, const __m128i *srcend)
{
// main loop: scramble two 16-byte blocks
for ( ; src + 2 < srcend; src += 2)
hash2x16bytes(state0, state1, src, src + 1);
for ( ; advance<ZX>(src, 2) < srcend; src = advance<ZX>(src, 2))
hash2x16bytes<ZX>(state0, state1, src, advance<ZX>(src, 1));
return aeshash128_16to32(state0, state1, src, srcend);
return aeshash128_16to32<ZX>(state0, state1, src, srcend);
}
# if QT_COMPILER_SUPPORTS_HERE(VAES)
template <ZeroExtension> static __m256i loadu256(const void *ptr);
template <> Q_ALWAYS_INLINE QT_FUNCTION_TARGET(VAES) __m256i loadu256<None>(const void *ptr)
{
return _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptr));
}
template <> Q_ALWAYS_INLINE QT_FUNCTION_TARGET(VAES) __m256i loadu256<ByteToWord>(const void *ptr)
{
// VPMOVZXBW xmm, ymm
__m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
return _mm256_cvtepu8_epi16(data);
}
template <ZeroExtension ZX>
static size_t QT_FUNCTION_TARGET(VAES_AVX512) QT_VECTORCALL
aeshash256_lt32_avx256(__m256i state0, const uchar *p, size_t len)
{
__m128i state0_128 = _mm256_castsi256_si128(state0);
if (len) {
__mmask32 mask = _bzhi_u32(-1, unsigned(len));
__m256i data = _mm256_maskz_loadu_epi8(mask, p);
__m256i data;
if constexpr (ZX == None) {
__mmask32 mask = _bzhi_u32(-1, unsigned(len));
data = _mm256_maskz_loadu_epi8(mask, p);
} else {
__mmask16 mask = _bzhi_u32(-1, unsigned(len) / 2);
__m128i data0 = _mm_maskz_loadu_epi8(mask, p);
data = _mm256_cvtepu8_epi16(data0);
}
__m128i data0 = _mm256_castsi256_si128(data);
if (len >= sizeof(__m128i)) {
state0 = _mm256_xor_si256(state0, data);
@ -776,8 +834,9 @@ aeshash256_lt32_avx256(__m256i state0, const uchar *p, size_t len)
return mm_cvtsi128_sz(state0_128);
}
template <ZeroExtension ZX>
static size_t QT_FUNCTION_TARGET(VAES) QT_VECTORCALL
aeshash256_ge32(__m256i state0, const uchar *p, size_t len)
aeshash256_ge32(__m256i state0, const __m128i *s, const __m128i *end, size_t len)
{
static const auto hash32bytes = [](__m256i &state0, __m256i data) QT_FUNCTION_TARGET(VAES) {
state0 = _mm256_xor_si256(state0, data);
@ -787,10 +846,10 @@ aeshash256_ge32(__m256i state0, const uchar *p, size_t len)
};
// hash twice 32 bytes, running 2 scramble rounds of AES on itself
const auto hash2x32bytes = [](__m256i &state0, __m256i &state1, const __m256i *src0,
const __m256i *src1) QT_FUNCTION_TARGET(VAES) {
__m256i data0 = _mm256_loadu_si256(src0);
__m256i data1 = _mm256_loadu_si256(src1);
const auto hash2x32bytes = [](__m256i &state0, __m256i &state1, const void *src0,
const void *src1) QT_FUNCTION_TARGET(VAES) {
__m256i data0 = loadu256<ZX>(src0);
__m256i data1 = loadu256<ZX>(src1);
state0 = _mm256_xor_si256(data0, state0);
state1 = _mm256_xor_si256(data1, state1);
state0 = _mm256_aesenc_epi128(state0, state0);
@ -799,21 +858,22 @@ aeshash256_ge32(__m256i state0, const uchar *p, size_t len)
state1 = _mm256_aesenc_epi128(state1, state1);
};
const __m256i *src = reinterpret_cast<const __m256i *>(p);
const __m256i *srcend = reinterpret_cast<const __m256i *>(p + len);
const __m256i *src = reinterpret_cast<const __m256i *>(s);
const __m256i *srcend = reinterpret_cast<const __m256i *>(end);
__m256i state1 = _mm256_aesenc_epi128(state0, mm256_set1_epz(len));
// main loop: scramble two 32-byte blocks
for ( ; src + 2 < srcend; src += 2)
hash2x32bytes(state0, state1, src, src + 1);
for ( ; advance<ZX>(src, 2) < srcend; src = advance<ZX>(src, 2))
hash2x32bytes(state0, state1, src, advance<ZX>(src, 1));
if (src + 1 < srcend) {
const __m256i *src2 = advance<ZX>(srcend, -1);
if (advance<ZX>(src, 1) < srcend) {
// epilogue: between 32 and 31 bytes
hash2x32bytes(state0, state1, src, srcend - 1);
hash2x32bytes(state0, state1, src, src2);
} else if (src != srcend) {
// epilogue: between 1 and 32 bytes, overlap with the end
__m256i data = _mm256_loadu_si256(srcend - 1);
__m256i data = loadu256<ZX>(src2);
hash32bytes(state0, data);
}
@ -826,59 +886,69 @@ aeshash256_ge32(__m256i state0, const uchar *p, size_t len)
return mm_cvtsi128_sz(_mm_xor_si128(low, high));
}
template <ZeroExtension ZX>
static size_t QT_FUNCTION_TARGET(VAES)
aeshash256(const uchar *p, size_t len, size_t seed, size_t seed2) noexcept
{
AESHashSeed state(seed, seed2);
auto src = reinterpret_cast<const __m128i *>(p);
const auto srcend = reinterpret_cast<const __m128i *>(p + len);
const auto srcend = reinterpret_cast<const __m128i *>(advance<ZX>(p, len));
if (len < sizeof(__m128i))
return aeshash128_lt16(state.state0, p, len);
return aeshash128_lt16<ZX>(state.state0, src, srcend, len);
if (len <= sizeof(__m256i))
return aeshash128_16to32(state.state0, state.state1(), src, srcend);
return aeshash128_16to32<ZX>(state.state0, state.state1(), src, srcend);
return aeshash256_ge32(state.state0_256(), p, len);
return aeshash256_ge32<ZX>(state.state0_256(), src, srcend, len);
}
template <ZeroExtension ZX>
static size_t QT_FUNCTION_TARGET(VAES_AVX512)
aeshash256_avx256(const uchar *p, size_t len, size_t seed, size_t seed2) noexcept
{
AESHashSeed state(seed, seed2);
if (len <= sizeof(__m256i))
return aeshash256_lt32_avx256(state.state0_256(), p, len);
auto src = reinterpret_cast<const __m128i *>(p);
const auto srcend = reinterpret_cast<const __m128i *>(advance<ZX>(p, len));
return aeshash256_ge32(state.state0_256(), p, len);
if (len <= sizeof(__m256i))
return aeshash256_lt32_avx256<ZX>(state.state0_256(), p, len);
return aeshash256_ge32<ZX>(state.state0_256(), src, srcend, len);
}
# endif // VAES
template <ZeroExtension ZX>
static size_t QT_FUNCTION_TARGET(AES)
aeshash128(const uchar *p, size_t len, size_t seed, size_t seed2) noexcept
{
AESHashSeed state(seed, seed2);
auto src = reinterpret_cast<const __m128i *>(p);
const auto srcend = reinterpret_cast<const __m128i *>(p + len);
const auto srcend = reinterpret_cast<const __m128i *>(advance<ZX>(p, len));
if (len < sizeof(__m128i))
return aeshash128_lt16(state.state0, p, len);
return aeshash128_lt16<ZX>(state.state0, src, srcend, len);
if (len <= sizeof(__m256i))
return aeshash128_16to32(state.state0, state.state1(), src, srcend);
return aeshash128_16to32<ZX>(state.state0, state.state1(), src, srcend);
return aeshash128_ge32(state.state0, state.state1(), src, srcend);
return aeshash128_ge32<ZX>(state.state0, state.state1(), src, srcend);
}
template <ZeroExtension ZX = None>
static size_t aeshash(const uchar *p, size_t len, size_t seed, size_t seed2) noexcept
{
if constexpr (ZX == ByteToWord)
len *= 2; // see note above on ZX == ByteToWord hashing
# if QT_COMPILER_SUPPORTS_HERE(VAES)
if (qCpuHasFeature(VAES)) {
if (qCpuHasFeature(AVX512VL))
return aeshash256_avx256(p, len, seed, seed2);
return aeshash256(p, len, seed, seed2);
return aeshash256_avx256<ZX>(p, len, seed, seed2);
return aeshash256<ZX>(p, len, seed, seed2);
}
# endif
return aeshash128(p, len, seed, seed2);
return aeshash128<ZX>(p, len, seed, seed2);
}
#endif // x86 AESNI
@ -1090,6 +1160,10 @@ size_t qHash(QLatin1StringView key, size_t seed) noexcept
if (seed)
seed2 = qt_qhash_seed.currentSeed(1);
#if defined(AESHASH)
if (seed && qCpuHasFeature(AES) && qCpuHasFeature(SSE4_2))
return aeshash<ByteToWord>(data, size, seed, seed2);
#endif
return qHashBits_fallback<ByteToWord>(data, size, seed, seed2);
}

View File

@ -289,10 +289,12 @@ void tst_QHashFunctions::stringConsistency_data()
QTest::newRow("null") << QString();
QTest::newRow("empty") << "";
QTest::newRow("withnull") << QStringLiteral("A\0z");
QTest::newRow("short-ascii") << "Hello";
QTest::newRow("short-ascii") << "Hello"; // 10 bytes
QTest::newRow("medium-ascii") << "Hello, World"; // 24 bytes
QTest::newRow("long-ascii") << QStringLiteral("abcdefghijklmnopqrstuvxyz").repeated(16);
QTest::newRow("short-latin1") << "Bokmål";
QTest::newRow("medium-latin1") << "Det går bra!"; // 24 bytes
QTest::newRow("long-latin1")
<< R"(Alle mennesker er født frie og med samme menneskeverd og menneskerettigheter.
De er utstyrt med fornuft og samvittighet og bør handle mot hverandre i brorskapets ånd.)";
@ -327,8 +329,6 @@ void tst_QHashFunctions::stringConsistency()
QLatin1StringView l1sv(l1ba.data(), l1ba.size());
#ifdef Q_PROCESSOR_ARM
// zero-extending aeshash not implemented on ARM
#elif defined(Q_PROCESSOR_X86)
// zero-extending aeshash not implemented on x86
#else
if (value == l1sv)
QCOMPARE(qHash(l1sv, seed), qHash(value, seed));

View File

@ -5,7 +5,7 @@
QT_BEGIN_NAMESPACE
size_t qHash(const Qt4String &str)
size_t qHash(const Qt4String &str, size_t /* never used */)
{
qsizetype n = str.size();
const QChar *p = str.unicode();
@ -40,7 +40,7 @@ size_t qHash(const Qt50String &key, size_t seed)
// Still, we can avoid writing the multiplication as "(h << 5) - h"
// -- the compiler will turn it into a shift and an addition anyway
// (for instance, gcc 4.4 does that even at -O0).
size_t qHash(const JavaString &str)
size_t qHash(const JavaString &str, size_t /* never used */)
{
const auto *p = reinterpret_cast<const char16_t *>(str.constData());
const qsizetype len = str.size();

View File

@ -13,6 +13,8 @@
#include <QUuid>
#include <QTest>
static constexpr quint64 RandomSeed32 = 1045982819;
static constexpr quint64 RandomSeed64 = QtPrivate::QHashCombine{}(RandomSeed32, RandomSeed32);
class tst_QHash : public QObject
{
@ -31,6 +33,8 @@ private slots:
void hashing_current_data() { data(); }
void hashing_current() { hashing_template<QString>(); }
void hashing_qbytearray_data() { data(); }
void hashing_qbytearray() { hashing_template<QByteArray>(); }
void hashing_qt50_data() { data(); }
void hashing_qt50() { hashing_template<Qt50String>(); }
void hashing_qt4_data() { data(); }
@ -38,15 +42,25 @@ private slots:
void hashing_javaString_data() { data(); }
void hashing_javaString() { hashing_template<JavaString>(); }
void hashing_nonzero_current_data() { data(); }
void hashing_nonzero_current() { hashing_nonzero_template<QString>(); }
void hashing_nonzero_qbytearray_data() { data(); }
void hashing_nonzero_qbytearray() { hashing_nonzero_template<QByteArray>(); }
void hashing_nonzero_qlatin1string_data() { data(); }
void hashing_nonzero_qlatin1string() { hashing_nonzero_template<OwningLatin1String>(); }
private:
void data();
template <typename String> void qhash_template();
template <typename String> void hashing_template();
template <typename String, size_t Seed = 0> void hashing_template();
template <typename String> void hashing_nonzero_template()
{ hashing_template<String, size_t(RandomSeed64)>(); }
QStringList smallFilePaths;
QStringList uuids;
QStringList dict;
QStringList numbers;
QStringList longstrings;
};
///////////////////// QHash /////////////////////
@ -68,10 +82,12 @@ void tst_QHash::initTestCase()
// guaranteed to be completely random, generated by http://xkcd.com/221/
QUuid ns = QUuid("{f43d2ef3-2fe9-4563-a6f5-5a0100c2d699}");
uuids.reserve(smallFilePaths.size());
longstrings.reserve(smallFilePaths.size());
foreach (const QString &path, smallFilePaths)
uuids.append(QUuid::createUuidV5(ns, path).toString());
for (qsizetype i = 0; i < uuids.size(); ++i)
longstrings.append(uuids.at(i).repeated(8));
// lots of strings with alphabetical characters, vaguely reminiscent of
// a dictionary.
@ -112,6 +128,7 @@ void tst_QHash::data()
QTest::addColumn<QStringList>("items");
QTest::newRow("paths-small") << smallFilePaths;
QTest::newRow("uuids-list") << uuids;
QTest::newRow("longstrings-list") << longstrings;
QTest::newRow("dictionary") << dict;
QTest::newRow("numbers") << numbers;
}
@ -132,19 +149,30 @@ template <typename String> void tst_QHash::qhash_template()
}
}
template <typename String> void tst_QHash::hashing_template()
template <typename String, size_t Seed> void tst_QHash::hashing_template()
{
// just the hashing function
QFETCH(QStringList, items);
QList<String> realitems;
realitems.reserve(items.size());
foreach (const QString &s, items)
realitems.append(s);
foreach (const QString &s, items) {
if constexpr (std::is_same_v<QString::value_type, typename String::value_type>) {
realitems.append(s);
} else if constexpr (sizeof(typename String::value_type) == 1) {
realitems.append(String(s.toLatin1()));
}
}
QBENCHMARK {
for (int i = 0, n = realitems.size(); i != n; ++i)
(void)qHash(realitems.at(i));
for (int i = 0, n = realitems.size(); i != n; ++i) {
volatile size_t h = qHash(realitems.at(i), Seed);
(void)h;
#ifdef Q_CC_GNU
// "use" h
asm ("" : "+r" (h));
#endif
}
}
}

View File

@ -1,8 +1,20 @@
// Copyright (C) 2016 The Qt Company Ltd.
// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only
#include <QHashFunctions>
#include <QString>
struct OwningLatin1String : QByteArray
{
OwningLatin1String() = default;
OwningLatin1String(const QByteArray &a) : QByteArray(a) {}
OwningLatin1String(QByteArray &&a) : QByteArray(std::move(a)) {}
};
QT_BEGIN_NAMESPACE
inline size_t qHash(const OwningLatin1String &s, size_t seed = 0)
{ return qHash(QLatin1StringView(s), seed); }
QT_END_NAMESPACE
struct Qt4String : QString
{
Qt4String() {}
@ -10,7 +22,7 @@ struct Qt4String : QString
};
QT_BEGIN_NAMESPACE
size_t qHash(const Qt4String &);
size_t qHash(const Qt4String &, size_t = 0);
QT_END_NAMESPACE
struct Qt50String : QString
@ -31,6 +43,6 @@ struct JavaString : QString
};
QT_BEGIN_NAMESPACE
size_t qHash(const JavaString &);
size_t qHash(const JavaString &, size_t = 0);
QT_END_NAMESPACE