QHash: fix GrowthPolicy::bucketsForCapacity

It was confusing entry capacity with the bucket capacity. The value
maxNumBuckets() returned was the maximum number of entries. This issue
was harmless: we would just fail to cap the maximum to an allocatable
size. But the array new[] in the Data constructors would have capped the
maximum anyway (by way of throwing std::bad_alloc).

So instead of trying to calculate what the maximum bucket count is so we
can cap at that, simplify the calculation of the next power of 2 while
preventing it from overflowing in our calculations. We continue to rely
on new[] throwing when we return count that is larger than the maximum
allocatable.

This commit changes the load factor for QHashes containing exactly a
number of elements that is exactly a power of two. Previously, it would
be loaded at 50%, now it's at 25%. For this reason, tst_QSet::squeeze
needed to be fixed to depend less on the implementation details.

Pick-to: 6.5
Change-Id: I9671dee8ceb64aa9b9cafffd17415f3856c358a0
Reviewed-by: Mårten Nordheim <marten.nordheim@qt.io>
This commit is contained in:
Thiago Macieira 2023-02-06 15:13:51 -08:00
parent e836c4776f
commit 1d167b515e
4 changed files with 109 additions and 27 deletions

View File

@ -3918,4 +3918,36 @@ size_t qHash(long double key, size_t seed) noexcept
Returns the number of elements removed, if any.
*/
#ifdef QT_HAS_CONSTEXPR_BITOPS
namespace QHashPrivate {
static_assert(qPopulationCount(SpanConstants::NEntries) == 1,
"NEntries must be a power of 2 for bucketForHash() to work.");
// ensure the size of a Span does not depend on the template parameters
using Node1 = Node<int, int>;
static_assert(sizeof(Span<Node1>) == sizeof(Span<Node<char, void *>>));
static_assert(sizeof(Span<Node1>) == sizeof(Span<Node<qsizetype, QHashDummyValue>>));
static_assert(sizeof(Span<Node1>) == sizeof(Span<Node<QString, QVariant>>));
static_assert(sizeof(Span<Node1>) > SpanConstants::NEntries);
static_assert(qNextPowerOfTwo(sizeof(Span<Node1>)) == SpanConstants::NEntries * 2);
// ensure allocations are always a power of two, at a minimum NEntries,
// obeying the fomula
// qNextPowerOfTwo(2 * N);
// without overflowing
static constexpr size_t NEntries = SpanConstants::NEntries;
static_assert(GrowthPolicy::bucketsForCapacity(1) == NEntries);
static_assert(GrowthPolicy::bucketsForCapacity(NEntries / 2 + 0) == NEntries);
static_assert(GrowthPolicy::bucketsForCapacity(NEntries / 2 + 1) == 2 * NEntries);
static_assert(GrowthPolicy::bucketsForCapacity(NEntries * 1 - 1) == 2 * NEntries);
static_assert(GrowthPolicy::bucketsForCapacity(NEntries * 1 + 0) == 4 * NEntries);
static_assert(GrowthPolicy::bucketsForCapacity(NEntries * 1 + 1) == 4 * NEntries);
static_assert(GrowthPolicy::bucketsForCapacity(NEntries * 2 - 1) == 4 * NEntries);
static_assert(GrowthPolicy::bucketsForCapacity(NEntries * 2 + 0) == 8 * NEntries);
static_assert(GrowthPolicy::bucketsForCapacity(SIZE_MAX / 4) == SIZE_MAX / 2 + 1);
static_assert(GrowthPolicy::bucketsForCapacity(SIZE_MAX / 2) == SIZE_MAX);
static_assert(GrowthPolicy::bucketsForCapacity(SIZE_MAX) == SIZE_MAX);
}
#endif
QT_END_NAMESPACE

View File

@ -5,11 +5,11 @@
#ifndef QHASH_H
#define QHASH_H
#include <QtCore/qalgorithms.h>
#include <QtCore/qcontainertools_impl.h>
#include <QtCore/qhashfunctions.h>
#include <QtCore/qiterator.h>
#include <QtCore/qlist.h>
#include <QtCore/qmath.h>
#include <QtCore/qrefcount.h>
#include <initializer_list>
@ -414,28 +414,25 @@ struct Span {
// QHash uses a power of two growth policy.
namespace GrowthPolicy {
inline constexpr size_t maxNumBuckets() noexcept
{
// ensure the size of a Span does not depend on the template parameters
using Node1 = Node<int, int>;
using Node2 = Node<char, void *>;
using Node3 = Node<qsizetype, QHashDummyValue>;
static_assert(sizeof(Span<Node1>) == sizeof(Span<Node2>));
static_assert(sizeof(Span<Node1>) == sizeof(Span<Node3>));
// Maximum is 2^31-1 or 2^63-1 bytes (limited by qsizetype and ptrdiff_t)
size_t max = (std::numeric_limits<ptrdiff_t>::max)();
return max / sizeof(Span<Node1>) * SpanConstants::NEntries;
}
inline constexpr size_t bucketsForCapacity(size_t requestedCapacity) noexcept
{
constexpr int SizeDigits = std::numeric_limits<size_t>::digits;
// We want to use at minimum a full span (128 entries), so we hardcode it for any requested
// capacity <= 64. Any capacity above that gets rounded to a later power of two.
if (requestedCapacity <= 64)
return SpanConstants::NEntries;
if (requestedCapacity >= maxNumBuckets())
return maxNumBuckets();
return qNextPowerOfTwo(2 * requestedCapacity - 1);
// Same as
// qNextPowerOfTwo(2 * requestedCapacity);
//
// but ensuring neither our multiplication nor the function overflow.
// Additionally, the maximum memory allocation is 2^31-1 or 2^63-1 bytes
// (limited by qsizetype and ptrdiff_t).
int count = qCountLeadingZeroBits(requestedCapacity);
if (count < 2)
return (std::numeric_limits<size_t>::max)(); // will cause std::bad_alloc
return size_t(1) << (SizeDigits - count + 1);
}
inline constexpr size_t bucketForHash(size_t nBuckets, size_t hash) noexcept
{
@ -460,6 +457,11 @@ struct Data
size_t seed = 0;
Span *spans = nullptr;
static constexpr size_t maxNumBuckets() noexcept
{
return (std::numeric_limits<ptrdiff_t>::max)() / sizeof(Span);
}
struct Bucket {
Span *span;
size_t index;
@ -1318,7 +1320,7 @@ public:
float load_factor() const noexcept { return d ? d->loadFactor() : 0; }
static float max_load_factor() noexcept { return 0.5; }
size_t bucket_count() const noexcept { return d ? d->numBuckets : 0; }
static size_t max_bucket_count() noexcept { return QHashPrivate::GrowthPolicy::maxNumBuckets(); }
static size_t max_bucket_count() noexcept { return Data::maxNumBuckets(); }
inline bool empty() const noexcept { return isEmpty(); }
@ -1948,7 +1950,7 @@ public:
float load_factor() const noexcept { return d ? d->loadFactor() : 0; }
static float max_load_factor() noexcept { return 0.5; }
size_t bucket_count() const noexcept { return d ? d->numBuckets : 0; }
static size_t max_bucket_count() noexcept { return QHashPrivate::GrowthPolicy::maxNumBuckets(); }
static size_t max_bucket_count() noexcept { return Data::maxNumBuckets(); }
inline bool empty() const noexcept { return isEmpty(); }

View File

@ -76,6 +76,8 @@ private slots:
void reserveShared();
void reserveLessThanCurrentAmount();
void reserveKeepCapacity_data();
void reserveKeepCapacity();
void QTBUG98265();
@ -2693,6 +2695,40 @@ void tst_QHash::reserveLessThanCurrentAmount()
}
}
void tst_QHash::reserveKeepCapacity_data()
{
QTest::addColumn<qsizetype>("requested");
auto addRow = [](qsizetype requested) {
QTest::addRow("%td", ptrdiff_t(requested)) << requested;
};
QHash<int, int> testHash = {{1, 1}};
qsizetype minCapacity = testHash.capacity();
addRow(minCapacity - 1);
addRow(minCapacity + 0);
addRow(minCapacity + 1);
addRow(2 * minCapacity - 1);
addRow(2 * minCapacity + 0);
addRow(2 * minCapacity + 1);
}
void tst_QHash::reserveKeepCapacity()
{
QFETCH(qsizetype, requested);
QHash<qsizetype, qsizetype> hash;
hash.reserve(requested);
qsizetype initialCapacity = hash.capacity();
QCOMPARE_GE(initialCapacity, requested);
// insert this many elements into the hash
for (qsizetype i = 0; i < requested; ++i)
hash.insert(i, i);
// it mustn't have increased capacity after inserting the elements
QCOMPARE(hash.capacity(), initialCapacity);
}
void tst_QHash::QTBUG98265()
{
QMultiHash<QUuid, QByteArray> a;

View File

@ -232,27 +232,39 @@ void tst_QSet::squeeze()
set.squeeze();
QVERIFY(set.capacity() < 100);
for (int i = 0; i < 512; ++i)
for (int i = 0; i < 500; ++i)
set.insert(i);
QVERIFY(set.capacity() == 512);
QCOMPARE(set.size(), 500);
// squeezed capacity for 500 elements
qsizetype capacity = set.capacity(); // current implementation: 512
QCOMPARE_GE(capacity, set.size());
set.reserve(50000);
QVERIFY(set.capacity() >= 50000);
QVERIFY(set.capacity() >= 50000); // current implementation: 65536
set.squeeze();
QVERIFY(set.capacity() == 512);
QCOMPARE(set.capacity(), capacity);
// removing elements does not shed capacity
set.remove(499);
QVERIFY(set.capacity() == 512);
QCOMPARE(set.capacity(), capacity);
set.insert(499);
QVERIFY(set.capacity() == 512);
QCOMPARE(set.capacity(), capacity);
set.insert(1000);
QVERIFY(set.capacity() == 1024);
// grow it beyond the current capacity
for (int i = set.size(); i <= capacity; ++i)
set.insert(i);
QCOMPARE(set.size(), capacity + 1);
QCOMPARE_GT(set.capacity(), capacity + 1);// current implementation: 2 * capacity (1024)
for (int i = 0; i < 500; ++i)
set.remove(i);
// removing elements does not shed capacity
QCOMPARE_GT(set.capacity(), capacity + 1);
set.squeeze();
QVERIFY(set.capacity() < 100);
}