qHash: implement chunked hashing of QLatin1StringView
So that it hashes to the same value as QString{,View}. In order to test this, you must either run on a CPU other than ARM and x86, or disable the AES hasher. I did that and can confirm siphash and murmurhash do work with on-the-fly conversion from Latin-1. Change-Id: I664b9f014ffc48cbb49bfffd17b03e5e62ec4e89 Reviewed-by: Mårten Nordheim <marten.nordheim@qt.io>
This commit is contained in:
parent
9a2e21174a
commit
970aad5418
@ -49,6 +49,8 @@
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
void qt_from_latin1(char16_t *dst, const char *str, size_t size) noexcept; // qstring.cpp
|
||||
|
||||
// We assume that pointers and size_t have the same size. If that assumption should fail
|
||||
// on a platform the code selecting the different methods below needs to be fixed.
|
||||
static_assert(sizeof(size_t) == QT_POINTER_SIZE, "size_t and pointers have different size.");
|
||||
@ -523,6 +525,52 @@ static size_t siphash(const uint8_t *in, size_t inlen, size_t seed, size_t seed2
|
||||
return hasher.finalize(in + (inlen & ~TailSizeMask), inlen & TailSizeMask);
|
||||
}
|
||||
|
||||
enum ZeroExtension {
|
||||
None = 0,
|
||||
ByteToWord = 1,
|
||||
};
|
||||
|
||||
template <ZeroExtension = None> static size_t
|
||||
qHashBits_fallback(const uchar *p, size_t size, size_t seed, size_t seed2) noexcept;
|
||||
template <> size_t qHashBits_fallback<None>(const uchar *p, size_t size, size_t seed, size_t seed2) noexcept
|
||||
{
|
||||
if (size <= QT_POINTER_SIZE)
|
||||
return murmurhash(p, size, seed);
|
||||
|
||||
return siphash(reinterpret_cast<const uchar *>(p), size, seed, seed2);
|
||||
}
|
||||
|
||||
template <> size_t qHashBits_fallback<ByteToWord>(const uchar *data, size_t size, size_t seed, size_t seed2) noexcept
|
||||
{
|
||||
auto quick_from_latin1 = [](char16_t *dest, const uchar *data, size_t size) {
|
||||
// Quick, "inlined" version for very short blocks
|
||||
std::copy_n(data, size, dest);
|
||||
};
|
||||
if (size <= QT_POINTER_SIZE / 2) {
|
||||
std::array<char16_t, QT_POINTER_SIZE / 2> buf;
|
||||
quick_from_latin1(buf.data(), data, size);
|
||||
return murmurhash(buf.data(), size * 2, seed);
|
||||
}
|
||||
|
||||
constexpr size_t TailSizeMask = sizeof(void *) / 2 - 1;
|
||||
std::array<char16_t, 256> buf;
|
||||
SipHash<> siphash(size * 2, seed, seed2);
|
||||
ptrdiff_t offset = 0;
|
||||
for ( ; offset + buf.size() < size; offset += buf.size()) {
|
||||
qt_from_latin1(buf.data(), reinterpret_cast<const char *>(data) + offset, buf.size());
|
||||
siphash.addBlock(reinterpret_cast<uint8_t *>(buf.data()), sizeof(buf));
|
||||
}
|
||||
if (size_t n = size - offset; n > TailSizeMask) {
|
||||
n &= ~TailSizeMask;
|
||||
qt_from_latin1(buf.data(), reinterpret_cast<const char *>(data) + offset, n);
|
||||
siphash.addBlock(reinterpret_cast<uint8_t *>(buf.data()), n * 2);
|
||||
offset += n;
|
||||
}
|
||||
|
||||
quick_from_latin1(buf.data(), data + offset, size - offset);
|
||||
return siphash.finalize(reinterpret_cast<uint8_t *>(buf.data()), (size - offset) * 2);
|
||||
}
|
||||
|
||||
#if defined(__SANITIZE_ADDRESS__) || defined(__SANITIZE_THREAD__) // GCC
|
||||
# define QHASH_AES_SANITIZER_BUILD
|
||||
#elif __has_feature(address_sanitizer) || __has_feature(thread_sanitizer) // Clang
|
||||
@ -978,18 +1026,17 @@ size_t qHashBits(const void *p, size_t size, size_t seed) noexcept
|
||||
size_t seed2 = size;
|
||||
if (seed)
|
||||
seed2 = qt_qhash_seed.currentSeed(1);
|
||||
|
||||
auto data = reinterpret_cast<const uchar *>(p);
|
||||
#ifdef AESHASH
|
||||
if (seed && qCpuHasFeature(AES) && qCpuHasFeature(SSE4_2))
|
||||
return aeshash(reinterpret_cast<const uchar *>(p), size, seed, seed2);
|
||||
return aeshash(data, size, seed, seed2);
|
||||
#elif defined(Q_PROCESSOR_ARM) && QT_COMPILER_SUPPORTS_HERE(AES) && !defined(QHASH_AES_SANITIZER_BUILD) && !defined(QT_BOOTSTRAPPED)
|
||||
if (seed && qCpuHasFeature(AES))
|
||||
return aeshash(reinterpret_cast<const uchar *>(p), size, seed, seed2);
|
||||
return aeshash(data, size, seed, seed2);
|
||||
#endif
|
||||
|
||||
if (size <= QT_POINTER_SIZE)
|
||||
return murmurhash(p, size, seed);
|
||||
|
||||
return siphash(reinterpret_cast<const uchar *>(p), size, seed, seed2);
|
||||
return qHashBits_fallback<>(data, size, seed, seed2);
|
||||
}
|
||||
|
||||
size_t qHash(QByteArrayView key, size_t seed) noexcept
|
||||
@ -1019,7 +1066,31 @@ size_t qHash(const QBitArray &bitArray, size_t seed) noexcept
|
||||
|
||||
size_t qHash(QLatin1StringView key, size_t seed) noexcept
|
||||
{
|
||||
return qHashBits(reinterpret_cast<const uchar *>(key.data()), size_t(key.size()), seed);
|
||||
#ifdef QT_BOOTSTRAPPED
|
||||
// the seed is always 0 in bootstrapped mode (no seed generation code),
|
||||
// so help the compiler do dead code elimination
|
||||
seed = 0;
|
||||
constexpr bool Qt6DeterministicHash = true;
|
||||
#else
|
||||
constexpr bool Qt6DeterministicHash = QT_VERSION_MAJOR == 6;
|
||||
#endif
|
||||
|
||||
auto data = reinterpret_cast<const uchar *>(key.data());
|
||||
size_t size = key.size();
|
||||
|
||||
if (seed == 0 && Qt6DeterministicHash) {
|
||||
// fall back to what we used to use prior to Qt 6.8
|
||||
return qHashBits(data, size, seed);
|
||||
}
|
||||
|
||||
// mix in the length as a secondary seed. For seed == 0, seed2 must be
|
||||
// size, to match what we used to do prior to Qt 6.2.
|
||||
// Multiplied by 2 to match the byte size of the equiavlent UTF-16 string.
|
||||
size_t seed2 = size * 2;
|
||||
if (seed)
|
||||
seed2 = qt_qhash_seed.currentSeed(1);
|
||||
|
||||
return qHashBits_fallback<ByteToWord>(data, size, seed, seed2);
|
||||
}
|
||||
|
||||
/*!
|
||||
|
@ -42,6 +42,7 @@ private slots:
|
||||
void heterogeneousSearchConstKey();
|
||||
void heterogeneousSearchByteArray();
|
||||
void heterogeneousSearchString();
|
||||
void heterogeneousSearchLatin1String();
|
||||
|
||||
void rehash_isnt_quadratic();
|
||||
void dont_need_default_constructor();
|
||||
@ -54,6 +55,7 @@ private slots:
|
||||
void qmultihashHeterogeneousSearchConstKey();
|
||||
void qmultihashHeterogeneousSearchByteArray();
|
||||
void qmultihashHeterogeneousSearchString();
|
||||
void qmultihashHeterogeneousSearchLatin1String();
|
||||
|
||||
void compare();
|
||||
void compare2();
|
||||
@ -1221,15 +1223,17 @@ template <> struct HeterogeneousSearchTestHelper<HeterogeneousHashingType>
|
||||
using HeterogeneousHashingType = QString;
|
||||
#endif
|
||||
|
||||
template <template <typename, typename> class Hash, typename String, typename View>
|
||||
static void heterogeneousSearchTest(const QList<std::remove_const_t<String>> &keys)
|
||||
template <template <typename, typename> class Hash, typename String, typename View, typename Converter>
|
||||
static void heterogeneousSearchTest(const QList<std::remove_const_t<String>> &keys, Converter conv)
|
||||
{
|
||||
#ifdef __cpp_concepts
|
||||
using Helper = HeterogeneousSearchTestHelper<View>;
|
||||
String key = keys.last();
|
||||
String otherKey = keys.first();
|
||||
View keyView(key);
|
||||
View otherKeyView(otherKey);
|
||||
auto keyHolder = conv(key);
|
||||
auto otherKeyHolder = conv(otherKey);
|
||||
View keyView(keyHolder);
|
||||
View otherKeyView(otherKeyHolder);
|
||||
|
||||
Hash<String, qsizetype> hash;
|
||||
static constexpr bool IsMultiHash = !std::is_same_v<decltype(hash.remove(String())), bool>;
|
||||
@ -1332,10 +1336,29 @@ static void heterogeneousSearchTest(const QList<std::remove_const_t<String>> &ke
|
||||
Helper::checkCounter();
|
||||
#else
|
||||
Q_UNUSED(keys);
|
||||
Q_UNUSED(conv);
|
||||
QSKIP("This feature requires C++20 (concepts)");
|
||||
#endif
|
||||
}
|
||||
|
||||
template <template <typename, typename> class Hash, typename String, typename View>
|
||||
static void heterogeneousSearchTest(const QList<std::remove_const_t<String>> &keys)
|
||||
{
|
||||
heterogeneousSearchTest<Hash, String, View>(keys, [](const String &s) { return View(s); });
|
||||
}
|
||||
|
||||
template <template <typename, typename> class Hash, typename T>
|
||||
static void heterogeneousSearchLatin1String(T)
|
||||
{
|
||||
if constexpr (!T::value) {
|
||||
QSKIP("QLatin1StringView and QString do not have the same hash on this platform");
|
||||
} else {
|
||||
// similar to the above
|
||||
auto toLatin1 = [](const QString &s) { return s.toLatin1(); };
|
||||
heterogeneousSearchTest<Hash, QString, QLatin1StringView>({ "Hello", {}, "World" }, toLatin1);
|
||||
}
|
||||
}
|
||||
|
||||
void tst_QHash::heterogeneousSearch()
|
||||
{
|
||||
heterogeneousSearchTest<QHash, QString, HeterogeneousHashingType>({ "Hello", {}, "World" });
|
||||
@ -1357,6 +1380,11 @@ void tst_QHash::heterogeneousSearchString()
|
||||
heterogeneousSearchTest<QHash, QString, QStringView>({ "Hello", {}, "World" });
|
||||
}
|
||||
|
||||
void tst_QHash::heterogeneousSearchLatin1String()
|
||||
{
|
||||
::heterogeneousSearchLatin1String<QHash>(QHashHeterogeneousSearch<QString, QLatin1StringView>{});
|
||||
}
|
||||
|
||||
void tst_QHash::compare()
|
||||
{
|
||||
QHash<int, QString> hash1,hash2;
|
||||
@ -2346,6 +2374,11 @@ void tst_QHash::qmultihashHeterogeneousSearchString()
|
||||
heterogeneousSearchTest<QMultiHash, QString, QStringView>({ "Hello", {}, "World" });
|
||||
}
|
||||
|
||||
void tst_QHash::qmultihashHeterogeneousSearchLatin1String()
|
||||
{
|
||||
::heterogeneousSearchLatin1String<QMultiHash>(QHashHeterogeneousSearch<QString, QLatin1StringView>{});
|
||||
}
|
||||
|
||||
void tst_QHash::keys_values_uniqueKeys()
|
||||
{
|
||||
QMultiHash<QString, int> hash;
|
||||
|
@ -321,6 +321,19 @@ void tst_QHashFunctions::stringConsistency()
|
||||
|
||||
QCOMPARE(qHash(sv, seed), qHash(value, seed));
|
||||
QCOMPARE(qHash(u8bav, seed), qHash(u8ba, seed));
|
||||
|
||||
if (seed || QT_VERSION_MAJOR > 6) {
|
||||
QByteArray l1ba = value.toLatin1();
|
||||
QLatin1StringView l1sv(l1ba.data(), l1ba.size());
|
||||
#ifdef Q_PROCESSOR_ARM
|
||||
// zero-extending aeshash not implemented on ARM
|
||||
#elif defined(Q_PROCESSOR_X86)
|
||||
// zero-extending aeshash not implemented on x86
|
||||
#else
|
||||
if (value == l1sv)
|
||||
QCOMPARE(qHash(l1sv, seed), qHash(value, seed));
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void tst_QHashFunctions::qhash()
|
||||
|
Loading…
x
Reference in New Issue
Block a user