From 55959aefab1a190435dfacfc2a136ce3314d423c Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Fri, 2 Feb 2024 22:15:56 -0800 Subject: [PATCH] qHash: implement an AES hasher for QLatin1StringView MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's the same aeshash() as before, except we're passing a template parameter to indicate whether to read half and then zero-extend the data. That is, it will perform a conversion from Latin1 on the fly. When running in zero-extending mode, the length parameters are actually doubled (counting the number of UTF-16 code units) and we then divide again by 2 when advancing. The implementation should have the following performance characteristics: * QLatin1StringView now will be roughly half as fast as Qt 6.7 * QLatin1StringView now will be roughly as fast as QStringView For the aeshash128() in default builds of QtCore (will use SSE4.1), the long loop (32 characters or more) is: QStringView QLatin1StringView movdqu -0x20(%rax),%xmm4 | pmovzxbw -0x10(%rdx),%xmm2 movdqu -0x10(%rax),%xmm5 | pmovzxbw -0x8(%rdx),%xmm3 add $0x20,%rax | add $0x10,%rdx pxor %xmm4,%xmm0 | pxor %xmm2,%xmm0 pxor %xmm5,%xmm1 | pxor %xmm3,%xmm1 aesenc %xmm0,%xmm0 aesenc %xmm0,%xmm0 aesenc %xmm1,%xmm1 aesenc %xmm1,%xmm1 aesenc %xmm0,%xmm0 aesenc %xmm0,%xmm0 aesenc %xmm1,%xmm1 aesenc %xmm1,%xmm1 The number of instructions is identical, but there are actually 2 more uops per iteration. LLVM-MCA simulation shows this should execute in the same number of cycles on older CPUs that do not have support for VAES (see ). For the VAES version in aeshash256() and the AVX10 version in aeshash256_256(): QStringView QLatin1StringView vpxor -0x40(%rax),%ymm1,%ym | vpmovzxbw -0x20(%rax),%ymm3 vpxor -0x20(%rax),%ymm0,%ym | vpmovzxbw -0x10(%rax),%ymm2 add $0x40,%rax | add $0x20,%rax | vpxor %ymm3,%ymm0,%ymm0 | vpxor %ymm2,%ymm1,%ymm1 vaesenc %ymm1,%ymm1,%ymm1 < vaesenc %ymm0,%ymm0,%ymm0 vaesenc %ymm0,%ymm0,%ymm0 vaesenc %ymm1,%ymm1,%ymm1 vaesenc %ymm1,%ymm1,%ymm1 vaesenc %ymm0,%ymm0,%ymm0 vaesenc %ymm0,%ymm0,%ymm0 > vaesenc %ymm1,%ymm1,%ymm1 In this case, the increase in number of instructions matches the increase in number of uops. The LLVM-MCA simulation says that the QLatin1StringView version is faster at 11 cycles/iteration vs 14 cyc/it (see ), but that can't be right. Measured performance of CPU cycles, on an Intel Core i9-7940X (Skylake, no VAES support), normalized on the QString performance (QByteArray is used as a stand-in for the performance in Qt 6.7): aeshash | siphash QByteArray QL1SV QString QByteArray QString dictionary 94.5% 79.7% 100.0% 150.5%* 159.8% paths-small 90.2% 93.2% 100.0% 202.8% 290.3% uuids 81.8% 100.7% 100.0% 215.2% 350.7% longstrings 42.5% 100.8% 100.0% 185.7% 353.2% numbers 95.5% 77.9% 100.0% 155.3%* 164.5% On an Intel Core i7-1165G7 (Tiger Lake, capable of VAES and AVX512VL): aeshash | siphash QByteArray QL1SV QString QByteArray QString dictionary 90.0% 91.1% 100.0% 103.3%* 157.1% paths-small 99.4% 104.8% 100.0% 237.5% 358.0% uuids 88.5% 117.6% 100.0% 274.5% 461.7% longstrings 57.4% 111.2% 100.0% 503.0% 974.3% numbers 90.6% 89.7% 100.0% 98.7%* 149.9% On an Intel 4th Generation Xeon Scalable Platinum (Sapphire Rapids, same Golden Cove core as Alder Lake): aeshash | siphash QByteArray QL1SV QString QByteArray QString dictionary 89.9% 102.1% 100.0% 158.1%* 172.7% paths-small 78.0% 89.4% 100.0% 159.4% 258.0% uuids 109.1% 107.9% 100.0% 279.0% 496.3% longstrings 52.1% 112.4% 100.0% 564.4% 1078.3% numbers 85.8% 98.9% 100.0% 152.6%* 190.4% * dictionary contains very short entries (6 characters) * paths-small contains strings of varying length, but very few over 32 * uuids-list contains fixed-length strings (38 characters) * longstrings is the same but 304 characters * numbers also a lot contains very short strings (1 to 6 chars) What this shows: * For short strings, the performance difference is negligible between all three * For longer strings, QLatin1StringView now costs between 7 and 17% more than QString on the tested machines instead of up to ~50% less, except on the older machine (where I think the main QString hashing is suffering from memory bandwidth limitations) * The AES hash implementation is anywhere from 1.6 to 11x faster than Siphash * Murmurhash (marked with asterisk) is much faster than Siphash, but it only managed to beat the AES hash in one test Change-Id: I664b9f014ffc48cbb49bfffd17b045c1811ac0ed Reviewed-by: Qt CI Bot Reviewed-by: Mårten Nordheim --- src/corelib/tools/qhash.cpp | 174 +++++++++++++----- .../qhashfunctions/tst_qhashfunctions.cpp | 6 +- .../corelib/tools/qhash/outofline.cpp | 4 +- .../corelib/tools/qhash/tst_bench_qhash.cpp | 42 ++++- .../corelib/tools/qhash/tst_bench_qhash.h | 16 +- 5 files changed, 178 insertions(+), 64 deletions(-) diff --git a/src/corelib/tools/qhash.cpp b/src/corelib/tools/qhash.cpp index 591a1ca1c6c..80771417955 100644 --- a/src/corelib/tools/qhash.cpp +++ b/src/corelib/tools/qhash.cpp @@ -616,8 +616,39 @@ namespace { // the scrambling round (step 3 in [1]) because it's just very good at // spreading the bits around. // + // Note on Latin-1 hashing (ZX == ByteToWord): for simplicity of the + // algorithm, we pass sizes equivalent to the UTF-16 content (ZX == None). + // That means we must multiply by 2 on entry, divide by 2 on pointer + // advancing, and load half as much data from memory (though we produce + // exactly as much data in registers). The compilers appear to optimize + // this out. + // // [1] https://en.wikipedia.org/wiki/Advanced_Encryption_Standard#High-level_description_of_the_algorithm + template static const T *advance(const T *ptr, ptrdiff_t n) + { + if constexpr (ZX == None) + return ptr + n; + + // see note above on ZX == ByteToWord hashing + auto p = reinterpret_cast(ptr); + n *= sizeof(T); + return reinterpret_cast(p + n/2); + } + + template static __m128i loadu128(const void *ptr); + template <> Q_ALWAYS_INLINE QT_FUNCTION_TARGET(AES) __m128i loadu128(const void *ptr) + { + return _mm_loadu_si128(reinterpret_cast(ptr)); + } + template <> Q_ALWAYS_INLINE QT_FUNCTION_TARGET(AES) __m128i loadu128(const void *ptr) + { + // use a MOVQ followed by PMOVZXBW + // the compiler usually combines them as a single, loading PMOVZXBW + __m128i data = _mm_loadl_epi64(static_cast(ptr)); + return _mm_cvtepu8_epi16(data); + } + // hash 16 bytes, running 3 scramble rounds of AES on itself (like label "final1") static void Q_ALWAYS_INLINE QT_FUNCTION_TARGET(AES) QT_VECTORCALL hash16bytes(__m128i &state0, __m128i data) @@ -629,11 +660,12 @@ namespace { } // hash twice 16 bytes, running 2 scramble rounds of AES on itself + template static void QT_FUNCTION_TARGET(AES) QT_VECTORCALL hash2x16bytes(__m128i &state0, __m128i &state1, const __m128i *src0, const __m128i *src1) { - __m128i data0 = _mm_loadu_si128(src0); - __m128i data1 = _mm_loadu_si128(src1); + __m128i data0 = loadu128(src0); + __m128i data1 = loadu128(src1); state0 = _mm_xor_si128(data0, state0); state1 = _mm_xor_si128(data1, state1); state0 = _mm_aesenc_si128(state0, state0); @@ -680,16 +712,18 @@ Q_ALWAYS_INLINE __m128i AESHashSeed::state1() const } } +template static size_t QT_FUNCTION_TARGET(AES) QT_VECTORCALL aeshash128_16to32(__m128i state0, __m128i state1, const __m128i *src, const __m128i *srcend) { { - if (src + 1 < srcend) { + const __m128i *src2 = advance(srcend, -1); + if (advance(src, 1) < srcend) { // epilogue: between 16 and 31 bytes - hash2x16bytes(state0, state1, src, srcend - 1); + hash2x16bytes(state0, state1, src, src2); } else if (src != srcend) { // epilogue: between 1 and 16 bytes, overlap with the end - __m128i data = _mm_loadu_si128(srcend - 1); + __m128i data = loadu128(src2); hash16bytes(state0, data); } @@ -700,8 +734,21 @@ aeshash128_16to32(__m128i state0, __m128i state1, const __m128i *src, const __m1 return mm_cvtsi128_sz(state0); } +// load all 16 bytes and mask off the bytes past the end of the source +static const qint8 maskarray[] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// load 16 bytes ending at the data end, then shuffle them to the beginning +static const qint8 shufflecontrol[] = { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 +}; + +template static size_t QT_FUNCTION_TARGET(AES) QT_VECTORCALL -aeshash128_lt16(__m128i state0, const uchar *p, size_t len) +aeshash128_lt16(__m128i state0, const __m128i *src, const __m128i *srcend, size_t len) { if (len) { // We're going to load 16 bytes and mask zero the part we don't care @@ -712,25 +759,15 @@ aeshash128_lt16(__m128i state0, const uchar *p, size_t len) constexpr quintptr PageSize = 4096; __m128i data; - if ((quintptr(p) & (PageSize / 2)) == 0) { + if ((quintptr(src) & (PageSize / 2)) == 0) { // lower half of the page: - // load all 16 bytes and mask off the bytes past the end of the source - static const qint8 maskarray[] = { - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }; __m128i mask = _mm_loadu_si128(reinterpret_cast(maskarray + 15 - len)); - data = _mm_loadu_si128(reinterpret_cast(p)); + data = loadu128(src); data = _mm_and_si128(data, mask); } else { // upper half of the page: - // load 16 bytes ending at the data end, then shuffle them to the beginning - static const qint8 shufflecontrol[] = { - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 - }; __m128i control = _mm_loadu_si128(reinterpret_cast(shufflecontrol + 15 - len)); - data = _mm_loadu_si128(reinterpret_cast(p + len) - 1); + data = loadu128(advance(srcend, -1)); data = _mm_shuffle_epi8(data, control); } @@ -739,24 +776,45 @@ aeshash128_lt16(__m128i state0, const uchar *p, size_t len) return mm_cvtsi128_sz(state0); } +template static size_t QT_FUNCTION_TARGET(AES) QT_VECTORCALL aeshash128_ge32(__m128i state0, __m128i state1, const __m128i *src, const __m128i *srcend) { // main loop: scramble two 16-byte blocks - for ( ; src + 2 < srcend; src += 2) - hash2x16bytes(state0, state1, src, src + 1); + for ( ; advance(src, 2) < srcend; src = advance(src, 2)) + hash2x16bytes(state0, state1, src, advance(src, 1)); - return aeshash128_16to32(state0, state1, src, srcend); + return aeshash128_16to32(state0, state1, src, srcend); } # if QT_COMPILER_SUPPORTS_HERE(VAES) +template static __m256i loadu256(const void *ptr); +template <> Q_ALWAYS_INLINE QT_FUNCTION_TARGET(VAES) __m256i loadu256(const void *ptr) +{ + return _mm256_loadu_si256(reinterpret_cast(ptr)); +} +template <> Q_ALWAYS_INLINE QT_FUNCTION_TARGET(VAES) __m256i loadu256(const void *ptr) +{ + // VPMOVZXBW xmm, ymm + __m128i data = _mm_loadu_si128(reinterpret_cast(ptr)); + return _mm256_cvtepu8_epi16(data); +} + +template static size_t QT_FUNCTION_TARGET(VAES_AVX512) QT_VECTORCALL aeshash256_lt32_avx256(__m256i state0, const uchar *p, size_t len) { __m128i state0_128 = _mm256_castsi256_si128(state0); if (len) { - __mmask32 mask = _bzhi_u32(-1, unsigned(len)); - __m256i data = _mm256_maskz_loadu_epi8(mask, p); + __m256i data; + if constexpr (ZX == None) { + __mmask32 mask = _bzhi_u32(-1, unsigned(len)); + data = _mm256_maskz_loadu_epi8(mask, p); + } else { + __mmask16 mask = _bzhi_u32(-1, unsigned(len) / 2); + __m128i data0 = _mm_maskz_loadu_epi8(mask, p); + data = _mm256_cvtepu8_epi16(data0); + } __m128i data0 = _mm256_castsi256_si128(data); if (len >= sizeof(__m128i)) { state0 = _mm256_xor_si256(state0, data); @@ -776,8 +834,9 @@ aeshash256_lt32_avx256(__m256i state0, const uchar *p, size_t len) return mm_cvtsi128_sz(state0_128); } +template static size_t QT_FUNCTION_TARGET(VAES) QT_VECTORCALL -aeshash256_ge32(__m256i state0, const uchar *p, size_t len) +aeshash256_ge32(__m256i state0, const __m128i *s, const __m128i *end, size_t len) { static const auto hash32bytes = [](__m256i &state0, __m256i data) QT_FUNCTION_TARGET(VAES) { state0 = _mm256_xor_si256(state0, data); @@ -787,10 +846,10 @@ aeshash256_ge32(__m256i state0, const uchar *p, size_t len) }; // hash twice 32 bytes, running 2 scramble rounds of AES on itself - const auto hash2x32bytes = [](__m256i &state0, __m256i &state1, const __m256i *src0, - const __m256i *src1) QT_FUNCTION_TARGET(VAES) { - __m256i data0 = _mm256_loadu_si256(src0); - __m256i data1 = _mm256_loadu_si256(src1); + const auto hash2x32bytes = [](__m256i &state0, __m256i &state1, const void *src0, + const void *src1) QT_FUNCTION_TARGET(VAES) { + __m256i data0 = loadu256(src0); + __m256i data1 = loadu256(src1); state0 = _mm256_xor_si256(data0, state0); state1 = _mm256_xor_si256(data1, state1); state0 = _mm256_aesenc_epi128(state0, state0); @@ -799,21 +858,22 @@ aeshash256_ge32(__m256i state0, const uchar *p, size_t len) state1 = _mm256_aesenc_epi128(state1, state1); }; - const __m256i *src = reinterpret_cast(p); - const __m256i *srcend = reinterpret_cast(p + len); + const __m256i *src = reinterpret_cast(s); + const __m256i *srcend = reinterpret_cast(end); __m256i state1 = _mm256_aesenc_epi128(state0, mm256_set1_epz(len)); // main loop: scramble two 32-byte blocks - for ( ; src + 2 < srcend; src += 2) - hash2x32bytes(state0, state1, src, src + 1); + for ( ; advance(src, 2) < srcend; src = advance(src, 2)) + hash2x32bytes(state0, state1, src, advance(src, 1)); - if (src + 1 < srcend) { + const __m256i *src2 = advance(srcend, -1); + if (advance(src, 1) < srcend) { // epilogue: between 32 and 31 bytes - hash2x32bytes(state0, state1, src, srcend - 1); + hash2x32bytes(state0, state1, src, src2); } else if (src != srcend) { // epilogue: between 1 and 32 bytes, overlap with the end - __m256i data = _mm256_loadu_si256(srcend - 1); + __m256i data = loadu256(src2); hash32bytes(state0, data); } @@ -826,59 +886,69 @@ aeshash256_ge32(__m256i state0, const uchar *p, size_t len) return mm_cvtsi128_sz(_mm_xor_si128(low, high)); } +template static size_t QT_FUNCTION_TARGET(VAES) aeshash256(const uchar *p, size_t len, size_t seed, size_t seed2) noexcept { AESHashSeed state(seed, seed2); auto src = reinterpret_cast(p); - const auto srcend = reinterpret_cast(p + len); + const auto srcend = reinterpret_cast(advance(p, len)); if (len < sizeof(__m128i)) - return aeshash128_lt16(state.state0, p, len); + return aeshash128_lt16(state.state0, src, srcend, len); if (len <= sizeof(__m256i)) - return aeshash128_16to32(state.state0, state.state1(), src, srcend); + return aeshash128_16to32(state.state0, state.state1(), src, srcend); - return aeshash256_ge32(state.state0_256(), p, len); + return aeshash256_ge32(state.state0_256(), src, srcend, len); } +template static size_t QT_FUNCTION_TARGET(VAES_AVX512) aeshash256_avx256(const uchar *p, size_t len, size_t seed, size_t seed2) noexcept { AESHashSeed state(seed, seed2); - if (len <= sizeof(__m256i)) - return aeshash256_lt32_avx256(state.state0_256(), p, len); + auto src = reinterpret_cast(p); + const auto srcend = reinterpret_cast(advance(p, len)); - return aeshash256_ge32(state.state0_256(), p, len); + if (len <= sizeof(__m256i)) + return aeshash256_lt32_avx256(state.state0_256(), p, len); + + return aeshash256_ge32(state.state0_256(), src, srcend, len); } # endif // VAES +template static size_t QT_FUNCTION_TARGET(AES) aeshash128(const uchar *p, size_t len, size_t seed, size_t seed2) noexcept { AESHashSeed state(seed, seed2); auto src = reinterpret_cast(p); - const auto srcend = reinterpret_cast(p + len); + const auto srcend = reinterpret_cast(advance(p, len)); if (len < sizeof(__m128i)) - return aeshash128_lt16(state.state0, p, len); + return aeshash128_lt16(state.state0, src, srcend, len); if (len <= sizeof(__m256i)) - return aeshash128_16to32(state.state0, state.state1(), src, srcend); + return aeshash128_16to32(state.state0, state.state1(), src, srcend); - return aeshash128_ge32(state.state0, state.state1(), src, srcend); + return aeshash128_ge32(state.state0, state.state1(), src, srcend); } +template static size_t aeshash(const uchar *p, size_t len, size_t seed, size_t seed2) noexcept { + if constexpr (ZX == ByteToWord) + len *= 2; // see note above on ZX == ByteToWord hashing + # if QT_COMPILER_SUPPORTS_HERE(VAES) if (qCpuHasFeature(VAES)) { if (qCpuHasFeature(AVX512VL)) - return aeshash256_avx256(p, len, seed, seed2); - return aeshash256(p, len, seed, seed2); + return aeshash256_avx256(p, len, seed, seed2); + return aeshash256(p, len, seed, seed2); } # endif - return aeshash128(p, len, seed, seed2); + return aeshash128(p, len, seed, seed2); } #endif // x86 AESNI @@ -1090,6 +1160,10 @@ size_t qHash(QLatin1StringView key, size_t seed) noexcept if (seed) seed2 = qt_qhash_seed.currentSeed(1); +#if defined(AESHASH) + if (seed && qCpuHasFeature(AES) && qCpuHasFeature(SSE4_2)) + return aeshash(data, size, seed, seed2); +#endif return qHashBits_fallback(data, size, seed, seed2); } diff --git a/tests/auto/corelib/tools/qhashfunctions/tst_qhashfunctions.cpp b/tests/auto/corelib/tools/qhashfunctions/tst_qhashfunctions.cpp index fdb2b373469..06f18dfe9c2 100644 --- a/tests/auto/corelib/tools/qhashfunctions/tst_qhashfunctions.cpp +++ b/tests/auto/corelib/tools/qhashfunctions/tst_qhashfunctions.cpp @@ -289,10 +289,12 @@ void tst_QHashFunctions::stringConsistency_data() QTest::newRow("null") << QString(); QTest::newRow("empty") << ""; QTest::newRow("withnull") << QStringLiteral("A\0z"); - QTest::newRow("short-ascii") << "Hello"; + QTest::newRow("short-ascii") << "Hello"; // 10 bytes + QTest::newRow("medium-ascii") << "Hello, World"; // 24 bytes QTest::newRow("long-ascii") << QStringLiteral("abcdefghijklmnopqrstuvxyz").repeated(16); QTest::newRow("short-latin1") << "Bokmål"; + QTest::newRow("medium-latin1") << "Det går bra!"; // 24 bytes QTest::newRow("long-latin1") << R"(Alle mennesker er født frie og med samme menneskeverd og menneskerettigheter. De er utstyrt med fornuft og samvittighet og bør handle mot hverandre i brorskapets ånd.)"; @@ -327,8 +329,6 @@ void tst_QHashFunctions::stringConsistency() QLatin1StringView l1sv(l1ba.data(), l1ba.size()); #ifdef Q_PROCESSOR_ARM // zero-extending aeshash not implemented on ARM -#elif defined(Q_PROCESSOR_X86) - // zero-extending aeshash not implemented on x86 #else if (value == l1sv) QCOMPARE(qHash(l1sv, seed), qHash(value, seed)); diff --git a/tests/benchmarks/corelib/tools/qhash/outofline.cpp b/tests/benchmarks/corelib/tools/qhash/outofline.cpp index f44746c5b1b..5b16c36ffbb 100644 --- a/tests/benchmarks/corelib/tools/qhash/outofline.cpp +++ b/tests/benchmarks/corelib/tools/qhash/outofline.cpp @@ -5,7 +5,7 @@ QT_BEGIN_NAMESPACE -size_t qHash(const Qt4String &str) +size_t qHash(const Qt4String &str, size_t /* never used */) { qsizetype n = str.size(); const QChar *p = str.unicode(); @@ -40,7 +40,7 @@ size_t qHash(const Qt50String &key, size_t seed) // Still, we can avoid writing the multiplication as "(h << 5) - h" // -- the compiler will turn it into a shift and an addition anyway // (for instance, gcc 4.4 does that even at -O0). -size_t qHash(const JavaString &str) +size_t qHash(const JavaString &str, size_t /* never used */) { const auto *p = reinterpret_cast(str.constData()); const qsizetype len = str.size(); diff --git a/tests/benchmarks/corelib/tools/qhash/tst_bench_qhash.cpp b/tests/benchmarks/corelib/tools/qhash/tst_bench_qhash.cpp index f6214dfa360..1a62a484372 100644 --- a/tests/benchmarks/corelib/tools/qhash/tst_bench_qhash.cpp +++ b/tests/benchmarks/corelib/tools/qhash/tst_bench_qhash.cpp @@ -13,6 +13,8 @@ #include #include +static constexpr quint64 RandomSeed32 = 1045982819; +static constexpr quint64 RandomSeed64 = QtPrivate::QHashCombine{}(RandomSeed32, RandomSeed32); class tst_QHash : public QObject { @@ -31,6 +33,8 @@ private slots: void hashing_current_data() { data(); } void hashing_current() { hashing_template(); } + void hashing_qbytearray_data() { data(); } + void hashing_qbytearray() { hashing_template(); } void hashing_qt50_data() { data(); } void hashing_qt50() { hashing_template(); } void hashing_qt4_data() { data(); } @@ -38,15 +42,25 @@ private slots: void hashing_javaString_data() { data(); } void hashing_javaString() { hashing_template(); } + void hashing_nonzero_current_data() { data(); } + void hashing_nonzero_current() { hashing_nonzero_template(); } + void hashing_nonzero_qbytearray_data() { data(); } + void hashing_nonzero_qbytearray() { hashing_nonzero_template(); } + void hashing_nonzero_qlatin1string_data() { data(); } + void hashing_nonzero_qlatin1string() { hashing_nonzero_template(); } + private: void data(); template void qhash_template(); - template void hashing_template(); + template void hashing_template(); + template void hashing_nonzero_template() + { hashing_template(); } QStringList smallFilePaths; QStringList uuids; QStringList dict; QStringList numbers; + QStringList longstrings; }; ///////////////////// QHash ///////////////////// @@ -68,10 +82,12 @@ void tst_QHash::initTestCase() // guaranteed to be completely random, generated by http://xkcd.com/221/ QUuid ns = QUuid("{f43d2ef3-2fe9-4563-a6f5-5a0100c2d699}"); uuids.reserve(smallFilePaths.size()); + longstrings.reserve(smallFilePaths.size()); foreach (const QString &path, smallFilePaths) uuids.append(QUuid::createUuidV5(ns, path).toString()); - + for (qsizetype i = 0; i < uuids.size(); ++i) + longstrings.append(uuids.at(i).repeated(8)); // lots of strings with alphabetical characters, vaguely reminiscent of // a dictionary. @@ -112,6 +128,7 @@ void tst_QHash::data() QTest::addColumn("items"); QTest::newRow("paths-small") << smallFilePaths; QTest::newRow("uuids-list") << uuids; + QTest::newRow("longstrings-list") << longstrings; QTest::newRow("dictionary") << dict; QTest::newRow("numbers") << numbers; } @@ -132,19 +149,30 @@ template void tst_QHash::qhash_template() } } -template void tst_QHash::hashing_template() +template void tst_QHash::hashing_template() { // just the hashing function QFETCH(QStringList, items); QList realitems; realitems.reserve(items.size()); - foreach (const QString &s, items) - realitems.append(s); + foreach (const QString &s, items) { + if constexpr (std::is_same_v) { + realitems.append(s); + } else if constexpr (sizeof(typename String::value_type) == 1) { + realitems.append(String(s.toLatin1())); + } + } QBENCHMARK { - for (int i = 0, n = realitems.size(); i != n; ++i) - (void)qHash(realitems.at(i)); + for (int i = 0, n = realitems.size(); i != n; ++i) { + volatile size_t h = qHash(realitems.at(i), Seed); + (void)h; +#ifdef Q_CC_GNU + // "use" h + asm ("" : "+r" (h)); +#endif + } } } diff --git a/tests/benchmarks/corelib/tools/qhash/tst_bench_qhash.h b/tests/benchmarks/corelib/tools/qhash/tst_bench_qhash.h index 7d89f044c4e..501b4a8b7f1 100644 --- a/tests/benchmarks/corelib/tools/qhash/tst_bench_qhash.h +++ b/tests/benchmarks/corelib/tools/qhash/tst_bench_qhash.h @@ -1,8 +1,20 @@ // Copyright (C) 2016 The Qt Company Ltd. // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only +#include #include +struct OwningLatin1String : QByteArray +{ + OwningLatin1String() = default; + OwningLatin1String(const QByteArray &a) : QByteArray(a) {} + OwningLatin1String(QByteArray &&a) : QByteArray(std::move(a)) {} +}; +QT_BEGIN_NAMESPACE +inline size_t qHash(const OwningLatin1String &s, size_t seed = 0) +{ return qHash(QLatin1StringView(s), seed); } +QT_END_NAMESPACE + struct Qt4String : QString { Qt4String() {} @@ -10,7 +22,7 @@ struct Qt4String : QString }; QT_BEGIN_NAMESPACE -size_t qHash(const Qt4String &); +size_t qHash(const Qt4String &, size_t = 0); QT_END_NAMESPACE struct Qt50String : QString @@ -31,6 +43,6 @@ struct JavaString : QString }; QT_BEGIN_NAMESPACE -size_t qHash(const JavaString &); +size_t qHash(const JavaString &, size_t = 0); QT_END_NAMESPACE