Revert "Cleanup QSemaphore and make it always 64bit large"

This reverts commit ff69227a49119c00f703cf89c9d72db7eeeeaa66.

Reason for revert: 64-bit atomics on 32-bit systems are often
(but not always) worse than the 32-bit semaphore as it was
implemented. Plus the High32 and Low32 functions are returning
the same thing, forgetting the endianness check.

Change-Id: I5d5ade6e9bc7086600ff2302546385151e32142b
Reviewed-by: Volker Hilsheimer <volker.hilsheimer@qt.io>
This commit is contained in:
Thiago Macieira 2020-09-06 21:58:36 +00:00
parent e33d40a884
commit 475464906c
2 changed files with 101 additions and 57 deletions

View File

@ -126,19 +126,29 @@ using namespace QtFutex;
unspecified, but it's likely single-token threads will get woken up first. unspecified, but it's likely single-token threads will get woken up first.
*/ */
static const quint64 futexNeedsWakeAllBit = #if defined(FUTEX_OP) && QT_POINTER_SIZE > 4
Q_UINT64_C(1) << (sizeof(quint64) * CHAR_BIT - 1); static constexpr bool futexHasWaiterCount = true;
#else
static constexpr bool futexHasWaiterCount = false;
#endif
static int futexAvailCounter(quint64 v) static const quintptr futexNeedsWakeAllBit =
Q_UINT64_C(1) << (sizeof(quintptr) * CHAR_BIT - 1);
static int futexAvailCounter(quintptr v)
{ {
// the low 31 bits
if (futexHasWaiterCount) {
// the high bit of the low word isn't used // the high bit of the low word isn't used
Q_ASSERT((v & 0x80000000U) == 0); Q_ASSERT((v & 0x80000000U) == 0);
// so we can be a little faster // so we can be a little faster
return int(unsigned(v)); return int(unsigned(v));
} }
return int(v & 0x7fffffffU);
}
static bool futexNeedsWake(quint64 v) static bool futexNeedsWake(quintptr v)
{ {
// If we're counting waiters, the number of waiters is stored in the low 31 // If we're counting waiters, the number of waiters is stored in the low 31
// bits of the high word (that is, bits 32-62). If we're not, then we use // bits of the high word (that is, bits 32-62). If we're not, then we use
@ -147,22 +157,26 @@ static bool futexNeedsWake(quint64 v)
return v >> 31; return v >> 31;
} }
static QBasicAtomicInteger<quint32> *futexLow32(QBasicAtomicInteger<quint64> *ptr) static QBasicAtomicInteger<quint32> *futexLow32(QBasicAtomicInteger<quintptr> *ptr)
{ {
auto result = reinterpret_cast<QBasicAtomicInteger<quint32> *>(ptr); auto result = reinterpret_cast<QBasicAtomicInteger<quint32> *>(ptr);
#if Q_BYTE_ORDER == Q_BIG_ENDIAN && QT_POINTER_SIZE > 4
++result; ++result;
#endif
return result; return result;
} }
static QBasicAtomicInteger<quint32> *futexHigh32(QBasicAtomicInteger<quint64> *ptr) static QBasicAtomicInteger<quint32> *futexHigh32(QBasicAtomicInteger<quintptr> *ptr)
{ {
auto result = reinterpret_cast<QBasicAtomicInteger<quint32> *>(ptr); auto result = reinterpret_cast<QBasicAtomicInteger<quint32> *>(ptr);
#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN && QT_POINTER_SIZE > 4
++result; ++result;
#endif
return result; return result;
} }
template <bool IsTimed> bool template <bool IsTimed> bool
futexSemaphoreTryAcquire_loop(QBasicAtomicInteger<quint64> &u, quint64 curValue, quint64 nn, int timeout) futexSemaphoreTryAcquire_loop(QBasicAtomicInteger<quintptr> &u, quintptr curValue, quintptr nn, int timeout)
{ {
QDeadlineTimer timer(IsTimed ? QDeadlineTimer(timeout) : QDeadlineTimer()); QDeadlineTimer timer(IsTimed ? QDeadlineTimer(timeout) : QDeadlineTimer());
qint64 remainingTime = timeout * Q_INT64_C(1000) * 1000; qint64 remainingTime = timeout * Q_INT64_C(1000) * 1000;
@ -174,7 +188,7 @@ futexSemaphoreTryAcquire_loop(QBasicAtomicInteger<quint64> &u, quint64 curValue,
forever { forever {
if (futexAvailCounter(curValue) >= n) { if (futexAvailCounter(curValue) >= n) {
// try to acquire // try to acquire
quint64 newValue = curValue - nn; quintptr newValue = curValue - nn;
if (u.testAndSetOrdered(curValue, newValue, curValue)) if (u.testAndSetOrdered(curValue, newValue, curValue))
return true; // succeeded! return true; // succeeded!
continue; continue;
@ -187,11 +201,14 @@ futexSemaphoreTryAcquire_loop(QBasicAtomicInteger<quint64> &u, quint64 curValue,
// indicate we're waiting // indicate we're waiting
start_wait: start_wait:
auto ptr = futexLow32(&u); auto ptr = futexLow32(&u);
if (n > 1) { if (n > 1 || !futexHasWaiterCount) {
u.fetchAndOrRelaxed(futexNeedsWakeAllBit); u.fetchAndOrRelaxed(futexNeedsWakeAllBit);
curValue |= futexNeedsWakeAllBit; curValue |= futexNeedsWakeAllBit;
if (n > 1 && futexHasWaiterCount) {
ptr = futexHigh32(&u); ptr = futexHigh32(&u);
curValue >>= 32; //curValue >>= 32; // but this is UB in 32-bit, so roundabout:
curValue = quint64(curValue) >> 32;
}
} }
if (IsTimed && remainingTime > 0) { if (IsTimed && remainingTime > 0) {
@ -208,17 +225,18 @@ start_wait:
} }
} }
template <bool IsTimed> bool futexSemaphoreTryAcquire(QBasicAtomicInteger<quint64> &u, int n, int timeout) template <bool IsTimed> bool futexSemaphoreTryAcquire(QBasicAtomicInteger<quintptr> &u, int n, int timeout)
{ {
// Try to acquire without waiting (we still loop because the testAndSet // Try to acquire without waiting (we still loop because the testAndSet
// call can fail). // call can fail).
quint64 nn = unsigned(n); quintptr nn = unsigned(n);
if (futexHasWaiterCount)
nn |= quint64(nn) << 32; // token count replicated in high word nn |= quint64(nn) << 32; // token count replicated in high word
quint64 curValue = u.loadAcquire(); quintptr curValue = u.loadAcquire();
while (futexAvailCounter(curValue) >= n) { while (futexAvailCounter(curValue) >= n) {
// try to acquire // try to acquire
quint64 newValue = curValue - nn; quintptr newValue = curValue - nn;
if (u.testAndSetOrdered(curValue, newValue, curValue)) if (u.testAndSetOrdered(curValue, newValue, curValue))
return true; // succeeded! return true; // succeeded!
} }
@ -226,7 +244,8 @@ template <bool IsTimed> bool futexSemaphoreTryAcquire(QBasicAtomicInteger<quint6
return false; return false;
// we need to wait // we need to wait
quint64 oneWaiter = quint64(Q_UINT64_C(1) << 32); // zero on 32-bit quintptr oneWaiter = quintptr(Q_UINT64_C(1) << 32); // zero on 32-bit
if (futexHasWaiterCount) {
// increase the waiter count // increase the waiter count
u.fetchAndAddRelaxed(oneWaiter); u.fetchAndAddRelaxed(oneWaiter);
@ -238,13 +257,16 @@ template <bool IsTimed> bool futexSemaphoreTryAcquire(QBasicAtomicInteger<quint6
// Also adjust nn to subtract oneWaiter when we succeed in acquiring. // Also adjust nn to subtract oneWaiter when we succeed in acquiring.
nn += oneWaiter; nn += oneWaiter;
}
if (futexSemaphoreTryAcquire_loop<IsTimed>(u, curValue, nn, timeout)) if (futexSemaphoreTryAcquire_loop<IsTimed>(u, curValue, nn, timeout))
return true; return true;
if (futexHasWaiterCount) {
// decrement the number of threads waiting // decrement the number of threads waiting
Q_ASSERT(futexHigh32(&u)->loadRelaxed() & 0x7fffffffU); Q_ASSERT(futexHigh32(&u)->loadRelaxed() & 0x7fffffffU);
u.fetchAndSubRelaxed(oneWaiter); u.fetchAndSubRelaxed(oneWaiter);
}
return false; return false;
} }
@ -268,8 +290,9 @@ QSemaphore::QSemaphore(int n)
{ {
Q_ASSERT_X(n >= 0, "QSemaphore", "parameter 'n' must be non-negative"); Q_ASSERT_X(n >= 0, "QSemaphore", "parameter 'n' must be non-negative");
if (futexAvailable()) { if (futexAvailable()) {
quint64 nn = unsigned(n); quintptr nn = unsigned(n);
nn |= nn << 32; // token count replicated in high word if (futexHasWaiterCount)
nn |= quint64(nn) << 32; // token count replicated in high word
u.storeRelaxed(nn); u.storeRelaxed(nn);
} else { } else {
d = new QSemaphorePrivate(n); d = new QSemaphorePrivate(n);
@ -328,13 +351,33 @@ void QSemaphore::release(int n)
Q_ASSERT_X(n >= 0, "QSemaphore::release", "parameter 'n' must be non-negative"); Q_ASSERT_X(n >= 0, "QSemaphore::release", "parameter 'n' must be non-negative");
if (futexAvailable()) { if (futexAvailable()) {
quint64 nn = unsigned(n); quintptr nn = unsigned(n);
nn |= nn << 32; // token count replicated in high word if (futexHasWaiterCount)
quint64 prevValue = u.fetchAndAddRelease(nn); nn |= quint64(nn) << 32; // token count replicated in high word
quintptr prevValue = u.fetchAndAddRelease(nn);
if (futexNeedsWake(prevValue)) { if (futexNeedsWake(prevValue)) {
#ifdef FUTEX_OP #ifdef FUTEX_OP
if (!futexHasWaiterCount) {
/* /*
The single-token waiters wait on the low half On 32-bit systems, all waiters are waiting on the same address,
so we'll wake them all and ask the kernel to clear the high bit.
atomic {
int oldval = u;
u = oldval & ~(1 << 31);
futexWake(u, INT_MAX);
if (oldval == 0) // impossible condition
futexWake(u, INT_MAX);
}
*/
quint32 op = FUTEX_OP_ANDN | FUTEX_OP_OPARG_SHIFT;
quint32 oparg = 31;
quint32 cmp = FUTEX_OP_CMP_EQ;
quint32 cmparg = 0;
futexWakeOp(u, INT_MAX, INT_MAX, u, FUTEX_OP(op, oparg, cmp, cmparg));
} else {
/*
On 64-bit systems, the single-token waiters wait on the low half
and the multi-token waiters wait on the upper half. So we ask and the multi-token waiters wait on the upper half. So we ask
the kernel to wake up n single-token waiters and all multi-token the kernel to wake up n single-token waiters and all multi-token
waiters (if any), then clear the multi-token wait bit. waiters (if any), then clear the multi-token wait bit.
@ -352,6 +395,7 @@ void QSemaphore::release(int n)
quint32 cmp = FUTEX_OP_CMP_LT; quint32 cmp = FUTEX_OP_CMP_LT;
quint32 cmparg = 0; quint32 cmparg = 0;
futexWakeOp(*futexLow32(&u), n, INT_MAX, *futexHigh32(&u), FUTEX_OP(op, oparg, cmp, cmparg)); futexWakeOp(*futexLow32(&u), n, INT_MAX, *futexHigh32(&u), FUTEX_OP(op, oparg, cmp, cmparg));
}
#else #else
// Unset the bit and wake everyone. There are two possibibilies // Unset the bit and wake everyone. There are two possibibilies
// under which a thread can set the bit between the AND and the // under which a thread can set the bit between the AND and the

View File

@ -67,7 +67,7 @@ private:
union { union {
QSemaphorePrivate *d; QSemaphorePrivate *d;
QBasicAtomicInteger<quint64> u; QBasicAtomicInteger<quintptr> u; // ### Qt6: make 64-bit
}; };
}; };